DreamStream-1 commited on
Commit
db96d15
·
verified ·
1 Parent(s): 0238247

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -45
app.py CHANGED
@@ -4,73 +4,67 @@ import csv
4
  import re
5
  import requests
6
  from sentence_transformers import SentenceTransformer, util
7
- from PyPDF2 import PdfReader # For handling PDF files
8
 
9
  # Initialize Sentence-Transformer model
10
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
11
 
12
- # Get Google API key from Hugging Face Secrets
13
- google_api_key = os.getenv("GOOGLE_API_KEY") # Get the key from environment variables
14
-
15
  # Define a function to extract leadership experience from resume text
16
  def extract_leadership_experience(resume_text):
 
17
  leadership_keywords = [
18
  "led", "managed", "team lead", "supervised", "coordinated", "directed",
19
  "oversaw", "responsible for", "led a team", "executed", "mentored",
20
  "project manager", "leadership role", "department head", "team captain"
21
  ]
22
 
 
23
  resume_text_lower = resume_text.lower()
 
 
24
  leadership_experience = []
25
  for keyword in leadership_keywords:
26
  if re.search(r"\b" + re.escape(keyword) + r"\b", resume_text_lower):
27
  leadership_experience.append(keyword)
28
 
29
- return ", ".join(set(leadership_experience)) if leadership_experience else "No leadership experience found"
30
-
31
- # Function to extract name, email, and contact information using Google API
32
- def extract_entities_via_google(resume_text):
33
- endpoint = "https://language.googleapis.com/v1/documents:analyzeEntities" # Google NLP API endpoint
34
- headers = {
35
- "Content-Type": "application/json",
36
- "Authorization": f"Bearer {google_api_key}" # Use the Google API key here
37
- }
38
-
39
- # Define request payload for Google API
40
- payload = {
41
- "document": {
42
- "type": "PLAIN_TEXT",
43
- "content": resume_text
44
- }
45
- }
46
-
47
- # Make the API request to Google
48
- response = requests.post(endpoint, json=payload, headers=headers)
49
 
 
50
  if response.status_code == 200:
51
  data = response.json()
52
- entities = data.get("entities", [])
53
- # Extracting name, email, and contact details (mocked here as needed)
54
- name = next((entity['name'] for entity in entities if entity['type'] == 'PERSON'), 'Unknown')
55
- email = next((entity['name'] for entity in entities if 'email' in entity['name'].lower()), 'No Email')
56
- contact = next((entity['name'] for entity in entities if 'phone' in entity['name'].lower()), 'No Contact')
57
- return {"name": name, "email": email, "contact": contact}
58
  else:
59
- return {"name": "Unknown", "email": "No Email", "contact": "No Contact"}
 
 
 
 
60
 
61
- # Function to extract text from resumes (.txt, .pdf)
62
  def extract_text_from_resume(resume_file):
 
63
  try:
64
  if resume_file.name.endswith('.txt'):
65
  with open(resume_file.name, 'r') as file:
66
  return file.read()
67
  elif resume_file.name.endswith('.pdf'):
68
- # Use PyPDF2 to extract text from PDF
69
- pdf_reader = PdfReader(resume_file.name)
70
- text = ""
71
- for page in pdf_reader.pages:
72
- text += page.extract_text()
73
- return text
74
  else:
75
  return ""
76
  except Exception as e:
@@ -101,13 +95,16 @@ def check_similarity(job_description, resume_files):
101
  resume_emb = model.encode(resume_text, convert_to_tensor=True)
102
  similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
103
 
 
104
  similarity_percentage = similarity_score * 100
105
 
 
106
  leadership_experience = extract_leadership_experience(resume_text)
107
 
108
- # Extract name, email, and contact info using Google API
109
- contact_info = extract_entities_via_google(resume_text)
110
 
 
111
  if similarity_score >= 0.50:
112
  candidate_name = contact_info.get('name', 'Unknown Candidate')
113
  results.append((
@@ -130,23 +127,31 @@ def check_similarity(job_description, resume_files):
130
  contact_info.get('contact', 'No Contact')
131
  ))
132
 
133
- # Return results and CSV file path
134
  csv_file_path = save_results_to_csv(results)
135
  return results, csv_file_path
136
 
137
- # Gradio interface
 
 
 
 
138
  with gr.Blocks() as demo:
139
  with gr.Row():
140
  job_desc_input = gr.Textbox(label="Job Description", lines=3)
141
  resume_input = gr.Files(label="Upload Resumes", file_count="multiple", file_types=[".pdf", ".txt"])
142
 
143
  results_output = gr.Dataframe(headers=["Resume Name", "Similarity Score (%)", "Eligibility", "Name", "Leadership Experience", "Email", "Contact"])
144
-
145
  # Define the button to trigger similarity check
146
  check_button = gr.Button("Check Similarity")
147
-
148
  # Set up button's action
149
- check_button.click(check_similarity, inputs=[job_desc_input, resume_input], outputs=[results_output, gr.File(label="Download CSV", value=save_results_to_csv)])
 
 
 
 
150
 
151
  # Launch Gradio interface
152
  demo.launch()
 
4
  import re
5
  import requests
6
  from sentence_transformers import SentenceTransformer, util
 
7
 
8
  # Initialize Sentence-Transformer model
9
  model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
10
 
 
 
 
11
  # Define a function to extract leadership experience from resume text
12
  def extract_leadership_experience(resume_text):
13
+ # Define leadership-related keywords/phrases
14
  leadership_keywords = [
15
  "led", "managed", "team lead", "supervised", "coordinated", "directed",
16
  "oversaw", "responsible for", "led a team", "executed", "mentored",
17
  "project manager", "leadership role", "department head", "team captain"
18
  ]
19
 
20
+ # Convert resume text to lower case for case-insensitive matching
21
  resume_text_lower = resume_text.lower()
22
+
23
+ # Look for matches in the resume text
24
  leadership_experience = []
25
  for keyword in leadership_keywords:
26
  if re.search(r"\b" + re.escape(keyword) + r"\b", resume_text_lower):
27
  leadership_experience.append(keyword)
28
 
29
+ # Return leadership experience as a string
30
+ if leadership_experience:
31
+ return ", ".join(set(leadership_experience))
32
+ else:
33
+ return "No leadership experience found"
34
+
35
+ # Define a function to extract contact info using Gemini API (simulated here)
36
+ def extract_entities_via_gemini(resume_text):
37
+ # This is a simulation of the Google Gemini API. Replace with your actual API calls.
38
+ response = requests.post(
39
+ "https://your-gemini-api-endpoint.com", # Replace with actual endpoint
40
+ data={"text": resume_text}
41
+ )
 
 
 
 
 
 
 
42
 
43
+ # Simulate successful response with mock data
44
  if response.status_code == 200:
45
  data = response.json()
46
+ return {
47
+ "name": data.get("name", "Unknown"),
48
+ "email": data.get("email", "No Email"),
49
+ "contact": data.get("contact", "No Contact")
50
+ }
 
51
  else:
52
+ return {
53
+ "name": "Unknown",
54
+ "email": "No Email",
55
+ "contact": "No Contact"
56
+ }
57
 
58
+ # Function to extract text from resumes (assumes .pdf or .txt files)
59
  def extract_text_from_resume(resume_file):
60
+ # Add your extraction logic here based on the file type (e.g., PDF, DOCX, TXT)
61
  try:
62
  if resume_file.name.endswith('.txt'):
63
  with open(resume_file.name, 'r') as file:
64
  return file.read()
65
  elif resume_file.name.endswith('.pdf'):
66
+ # Add logic to extract text from PDF
67
+ return "Extracted text from PDF file"
 
 
 
 
68
  else:
69
  return ""
70
  except Exception as e:
 
95
  resume_emb = model.encode(resume_text, convert_to_tensor=True)
96
  similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item()
97
 
98
+ # Convert similarity score to percentage
99
  similarity_percentage = similarity_score * 100
100
 
101
+ # Extract leadership experience
102
  leadership_experience = extract_leadership_experience(resume_text)
103
 
104
+ # Extract name, email, and contact info using Google Gemini API
105
+ contact_info = extract_entities_via_gemini(resume_text)
106
 
107
+ # Set a higher similarity threshold for eligibility
108
  if similarity_score >= 0.50:
109
  candidate_name = contact_info.get('name', 'Unknown Candidate')
110
  results.append((
 
127
  contact_info.get('contact', 'No Contact')
128
  ))
129
 
130
+ # Now return results and the file path of the CSV
131
  csv_file_path = save_results_to_csv(results)
132
  return results, csv_file_path
133
 
134
+ # Function to download the results as a CSV file
135
+ def download_results(results):
136
+ return save_results_to_csv(results)
137
+
138
+ # Define Gradio Interface
139
  with gr.Blocks() as demo:
140
  with gr.Row():
141
  job_desc_input = gr.Textbox(label="Job Description", lines=3)
142
  resume_input = gr.Files(label="Upload Resumes", file_count="multiple", file_types=[".pdf", ".txt"])
143
 
144
  results_output = gr.Dataframe(headers=["Resume Name", "Similarity Score (%)", "Eligibility", "Name", "Leadership Experience", "Email", "Contact"])
145
+
146
  # Define the button to trigger similarity check
147
  check_button = gr.Button("Check Similarity")
148
+
149
  # Set up button's action
150
+ check_button.click(
151
+ check_similarity,
152
+ inputs=[job_desc_input, resume_input],
153
+ outputs=[results_output, gr.File(label="Download CSV", value=download_results)]
154
+ )
155
 
156
  # Launch Gradio interface
157
  demo.launch()