DreamStream-1 commited on
Commit
a6fbdfa
·
verified ·
1 Parent(s): 8d917ed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import docx
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer, util
6
+ from PyPDF2 import PdfReader
7
+ import re
8
+ from datetime import datetime
9
+
10
+ # Load pre-trained model for sentence embedding
11
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
12
+
13
+ # Define maximum number of resumes
14
+ MAX_RESUMES = 10
15
+
16
+ # Keywords related to managerial and leadership roles
17
+ MANAGERIAL_KEYWORDS = ["manager", "team leader", "lead", "supervisor", "director", "head of", "leadership"]
18
+
19
+ # Function to load job description from file path
20
+ def load_job_description(job_desc_file):
21
+ if not os.path.exists(job_desc_file):
22
+ return "Job description file not found."
23
+ with open(job_desc_file, 'r') as file:
24
+ job_description = file.read()
25
+ if not job_description.strip():
26
+ return "Job description is empty."
27
+ return job_description
28
+
29
+ # Function to check similarity between resumes and job description
30
+ def check_similarity(job_description, resume_files):
31
+ results = []
32
+ job_emb = model.encode(job_description, convert_to_tensor=True)
33
+
34
+ for resume_file in resume_files:
35
+ resume_text = extract_text_from_resume(resume_file)
36
+ if not resume_text:
37
+ results.append((resume_file.name, 0, "Not Eligible", None, None))
38
+ continue
39
+
40
+ resume_emb = model.encode(resume_text, convert_to_tensor=True)
41
+ similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
42
+
43
+ # Extract leadership experience from resume
44
+ leadership_experience = extract_leadership_experience(resume_text)
45
+
46
+ # Increase the weight of the similarity score for candidates with managerial experience
47
+ if leadership_experience > 0:
48
+ similarity_score += 0.1 # Adjust the weight based on leadership experience
49
+
50
+ # Set a higher similarity threshold for eligibility
51
+ if similarity_score >= 0.50:
52
+ candidate_name = extract_candidate_name(resume_text)
53
+ results.append((resume_file.name, similarity_score, "Eligible", candidate_name, leadership_experience))
54
+ else:
55
+ results.append((resume_file.name, similarity_score, "Not Eligible", None, None))
56
+
57
+ return results
58
+
59
+ # Extract text from resume (handles .txt, .pdf, .docx)
60
+ def extract_text_from_resume(resume_file):
61
+ file_extension = os.path.splitext(resume_file)[1].lower()
62
+ if file_extension not in ['.txt', '.pdf', '.docx']:
63
+ return "Unsupported file format"
64
+
65
+ if file_extension == '.txt':
66
+ return read_text_file(resume_file)
67
+ elif file_extension == '.pdf':
68
+ return read_pdf_file(resume_file)
69
+ elif file_extension == '.docx':
70
+ return read_docx_file(resume_file)
71
+
72
+ return "Failed to read the resume text."
73
+
74
+ def read_text_file(file_path):
75
+ with open(file_path, 'r') as file:
76
+ return file.read()
77
+
78
+ def read_pdf_file(file_path):
79
+ reader = PdfReader(file_path)
80
+ text = ""
81
+ for page in reader.pages:
82
+ text += page.extract_text()
83
+ return text
84
+
85
+ def read_docx_file(file_path):
86
+ doc = docx.Document(file_path)
87
+ text = ""
88
+ for para in doc.paragraphs:
89
+ text += para.text
90
+ return text
91
+
92
+ # Extract candidate name from resume text
93
+ def extract_candidate_name(resume_text):
94
+ name_pattern = re.compile(r'\b([A-Z][a-z]+ [A-Z][a-z]+)\b')
95
+ matches = name_pattern.findall(resume_text)
96
+ if matches:
97
+ return matches[0] # Returns the first match
98
+ return "Unknown Candidate"
99
+
100
+ # Extract leadership experience (years of managerial experience)
101
+ def extract_leadership_experience(resume_text):
102
+ experience = 0
103
+ for keyword in MANAGERIAL_KEYWORDS:
104
+ pattern = r"\b" + keyword + r"\b.*?(\d{4}|\d{2})[\s\-/]*\d{2,4}"
105
+ matches = re.findall(pattern, resume_text, re.IGNORECASE)
106
+ for match in matches:
107
+ if isinstance(match, str) and match.isdigit():
108
+ experience = max(experience, int(match)) # Use the highest value
109
+ return experience
110
+
111
+ # Main processing function
112
+ def process_files(job_desc, resumes):
113
+ try:
114
+ # Check if the number of resumes is within the allowed limit
115
+ if len(resumes) > MAX_RESUMES:
116
+ return "Please upload no more than 10 resumes."
117
+
118
+ # Check if all necessary files are provided
119
+ if not job_desc or not resumes:
120
+ return "Please provide all necessary files."
121
+
122
+ # Load the job description
123
+ job_desc_text = load_job_description(job_desc)
124
+
125
+ # Check similarity
126
+ results = check_similarity(job_desc_text, resumes)
127
+
128
+ # Prepare the results in tabular form
129
+ df = pd.DataFrame(results, columns=["Resume File", "Similarity Score", "Eligibility", "Candidate Name", "Leadership Experience"])
130
+
131
+ # Output file for downloading
132
+ output_filename = f"/tmp/similarity_results_{datetime.now().strftime('%Y%m%d%H%M%S')}.csv"
133
+ df.to_csv(output_filename, index=False)
134
+
135
+ # Return the results as a table
136
+ return df, output_filename
137
+
138
+ except Exception as e:
139
+ # Return any errors encountered during processing
140
+ return f"Error processing files: {str(e)}", None
141
+
142
+
143
+ # Gradio Interface Components
144
+ job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath")
145
+ resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath")
146
+
147
+ # Gradio Outputs
148
+ results_output = gr.Dataframe(label="Analysis Results")
149
+ download_output = gr.File(label="Download Final Results")
150
+
151
+ # Gradio Interface
152
+ interface = gr.Interface(
153
+ fn=process_files,
154
+ inputs=[job_desc_input, resumes_input],
155
+ outputs=[results_output, download_output],
156
+ title="HR Assistant - Resume Screening",
157
+ description="Upload job description and resumes to screen candidates and download the results in a tabular format."
158
+ )
159
+
160
+ interface.launch()