DreamStream-1 commited on
Commit
822ac82
·
verified ·
1 Parent(s): e616c89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -71
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import gradio as gr
 
2
  from sentence_transformers import SentenceTransformer, util
3
  import docx
4
  import os
5
  from PyPDF2 import PdfReader
6
  import re
7
- from google.cloud import language_v1
8
- from google.oauth2 import service_account
9
 
10
  # Load pre-trained model for sentence embedding
11
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
@@ -13,13 +12,12 @@ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
13
  # Define maximum number of resumes
14
  MAX_RESUMES = 10
15
 
16
- # Google Cloud NLP Client Initialization
17
- def init_nlp_client():
18
- credentials = service_account.Credentials.from_service_account_info(gr.Secret('GOOGLE_API_KEY_SECRET'))
19
- client = language_v1.LanguageServiceClient(credentials=credentials)
20
- return client
21
 
22
- # Extract text from resume (handles .txt, .pdf, .docx)
23
  def extract_text_from_resume(resume_file):
24
  file_extension = os.path.splitext(resume_file)[1].lower()
25
  if file_extension not in ['.txt', '.pdf', '.docx']:
@@ -52,64 +50,6 @@ def read_docx_file(file_path):
52
  text += para.text
53
  return text
54
 
55
- # Extract candidate name from resume text
56
- def extract_candidate_name(resume_text):
57
- name_pattern = re.compile(r'\b([A-Z][a-z]+ [A-Z][a-z]+)\b')
58
- matches = name_pattern.findall(resume_text)
59
- if matches:
60
- return matches[0] # Returns the first match
61
- return "Unknown Candidate"
62
-
63
- # Function to extract email and contact from resume using regex
64
- def extract_contact_info(resume_text):
65
- contact_info = {}
66
-
67
- # Extract email using regex
68
- email_regex = r'[\w\.-]+@[\w\.-]+'
69
- emails = re.findall(email_regex, resume_text)
70
- if emails:
71
- contact_info['email'] = emails[0] # Take the first email found
72
-
73
- # Extract phone numbers using regex (basic phone number formats)
74
- phone_regex = r'\+?\d{1,4}[\s\-]?\(?\d{1,3}\)?[\s\-]?\d{3,4}[\s\-]?\d{4}'
75
- phone_numbers = re.findall(phone_regex, resume_text)
76
- if phone_numbers:
77
- contact_info['contact'] = phone_numbers[0] # Take the first phone number found
78
-
79
- return contact_info
80
-
81
- # Function to extract entities using Google NLP API with a prompt
82
- def extract_entities(resume_text):
83
- client = init_nlp_client()
84
-
85
- # Prepare the text for analysis
86
- document = language_v1.Document(content=resume_text, type_=language_v1.Document.Type.PLAIN_TEXT)
87
-
88
- # Create a system prompt asking to extract name, contact, and email
89
- system_prompt = """
90
- Please extract the candidate's name, contact information (phone number), and email address from the resume.
91
- The resume text is provided below. If no email or contact is found, return 'No Email' or 'No Contact'.
92
- Please also provide the candidate's full name if it can be identified.
93
- """
94
-
95
- # Append the prompt and resume text together
96
- full_text = system_prompt + "\n\n" + resume_text
97
-
98
- # Use Google NLP API to analyze entities
99
- response = client.analyze_entities(request={'document': document})
100
-
101
- entities = {}
102
- for entity in response.entities:
103
- entity_type = language_v1.Entity.Type(entity.type_).name
104
- if entity_type == 'PERSON':
105
- entities['name'] = entity.name
106
- if entity_type == 'PHONE_NUMBER':
107
- entities['contact'] = entity.name
108
- if entity_type == 'EMAIL':
109
- entities['email'] = entity.name
110
-
111
- return entities
112
-
113
  # Extract leadership experience (looking for keywords like manager, team lead, leadership)
114
  def extract_leadership_experience(resume_text):
115
  leadership_keywords = ['manager', 'management', 'team lead', 'supervised', 'leadership', 'head', 'coordinator']
@@ -118,6 +58,43 @@ def extract_leadership_experience(resume_text):
118
  return "Has leadership experience"
119
  return "No leadership experience"
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # Function to check similarity between resumes and job description
122
  def check_similarity(job_description, resume_files):
123
  results = []
@@ -126,7 +103,7 @@ def check_similarity(job_description, resume_files):
126
  for resume_file in resume_files:
127
  resume_text = extract_text_from_resume(resume_file)
128
  if not resume_text:
129
- results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience"))
130
  continue
131
 
132
  # Check for similarity between resume and job description
@@ -139,13 +116,12 @@ def check_similarity(job_description, resume_files):
139
  # Extract leadership experience
140
  leadership_experience = extract_leadership_experience(resume_text)
141
 
142
- # Extract name, email, and contact using Google NLP or regex
143
- contact_info = extract_contact_info(resume_text)
144
- nlp_entities = extract_entities(resume_text)
145
 
146
  # Set a higher similarity threshold for eligibility
147
  if similarity_score >= 0.50:
148
- candidate_name = nlp_entities.get('name', extract_candidate_name(resume_text))
149
  results.append((
150
  resume_file.name,
151
  similarity_percentage,
 
1
  import gradio as gr
2
+ import requests
3
  from sentence_transformers import SentenceTransformer, util
4
  import docx
5
  import os
6
  from PyPDF2 import PdfReader
7
  import re
 
 
8
 
9
  # Load pre-trained model for sentence embedding
10
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 
12
  # Define maximum number of resumes
13
  MAX_RESUMES = 10
14
 
15
+ # Function to fetch Google API key from Hugging Face Secrets
16
+ def get_google_api_key():
17
+ api_key = gr.secret('GOOGLE_API_KEY') # Fetching the API key from Hugging Face secrets
18
+ return api_key
 
19
 
20
+ # Function to extract text from resume (handles .txt, .pdf, .docx)
21
  def extract_text_from_resume(resume_file):
22
  file_extension = os.path.splitext(resume_file)[1].lower()
23
  if file_extension not in ['.txt', '.pdf', '.docx']:
 
50
  text += para.text
51
  return text
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Extract leadership experience (looking for keywords like manager, team lead, leadership)
54
  def extract_leadership_experience(resume_text):
55
  leadership_keywords = ['manager', 'management', 'team lead', 'supervised', 'leadership', 'head', 'coordinator']
 
58
  return "Has leadership experience"
59
  return "No leadership experience"
60
 
61
+ # System prompt to extract candidate details using Gemini API
62
+ def extract_entities_via_gemini(resume_text):
63
+ api_key = get_google_api_key() # Fetch the API key from Hugging Face secrets
64
+ endpoint = "https://gemini.googleapis.com/v1/documents:analyzeEntities" # Placeholder API endpoint (adjust as necessary)
65
+
66
+ headers = {
67
+ "Authorization": f"Bearer {api_key}",
68
+ "Content-Type": "application/json"
69
+ }
70
+
71
+ document = {
72
+ "document": {
73
+ "type": "PLAIN_TEXT",
74
+ "content": resume_text
75
+ }
76
+ }
77
+
78
+ # Send request to Gemini or another NLP API
79
+ response = requests.post(endpoint, headers=headers, json=document)
80
+
81
+ if response.status_code != 200:
82
+ return {"error": "Failed to extract entities from resume"}
83
+
84
+ # Process the response from the Gemini API (or similar NLP API)
85
+ entities = response.json().get('entities', [])
86
+ extracted_info = {}
87
+
88
+ for entity in entities:
89
+ if entity['type'] == 'PERSON':
90
+ extracted_info['name'] = entity['name']
91
+ if entity['type'] == 'EMAIL':
92
+ extracted_info['email'] = entity['name']
93
+ if entity['type'] == 'PHONE_NUMBER':
94
+ extracted_info['contact'] = entity['name']
95
+
96
+ return extracted_info
97
+
98
  # Function to check similarity between resumes and job description
99
  def check_similarity(job_description, resume_files):
100
  results = []
 
103
  for resume_file in resume_files:
104
  resume_text = extract_text_from_resume(resume_file)
105
  if not resume_text:
106
+ results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience", "No Email", "No Contact"))
107
  continue
108
 
109
  # Check for similarity between resume and job description
 
116
  # Extract leadership experience
117
  leadership_experience = extract_leadership_experience(resume_text)
118
 
119
+ # Extract name, email, and contact info using Google Gemini API
120
+ contact_info = extract_entities_via_gemini(resume_text)
 
121
 
122
  # Set a higher similarity threshold for eligibility
123
  if similarity_score >= 0.50:
124
+ candidate_name = contact_info.get('name', 'Unknown Candidate')
125
  results.append((
126
  resume_file.name,
127
  similarity_percentage,