Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from tkinter import Tk, filedialog
|
| 3 |
+
from PyQt5.QtWidgets import QApplication, QFileDialog
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
from transformers import AutoTokenizer, AutoModel
|
| 7 |
+
import google.generativeai as genai
|
| 8 |
+
import os
|
| 9 |
+
import io
|
| 10 |
+
import base64
|
| 11 |
+
import json
|
| 12 |
+
import pandas as pd
|
| 13 |
+
import smtplib
|
| 14 |
+
from email.mime.text import MIMEText
|
| 15 |
+
from email.mime.multipart import MIMEMultipart
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def mean_pooling(model_output, attention_mask):
|
| 19 |
+
token_embeddings = model_output[0] # First element of model_output contains all token embeddings
|
| 20 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 21 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
genai.configure(api_key="AIzaSyAV72r2GKD7C4p4rKpClXgeQS_e4dpHojM")
|
| 26 |
+
def ats_extractor(file_path):
|
| 27 |
+
"""
|
| 28 |
+
Extracts structured information from resumes using Gemini Vision Pro.
|
| 29 |
+
Args:
|
| 30 |
+
file_path (str): Path to the resume file (PDF).
|
| 31 |
+
Returns:
|
| 32 |
+
dict: Parsed resume information in JSON format.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
# Read the raw bytes of the PDF file
|
| 36 |
+
with open(file_path, 'rb') as pdf_file:
|
| 37 |
+
resume_data = pdf_file.read()
|
| 38 |
+
|
| 39 |
+
# Prompt for Vision Pro
|
| 40 |
+
prompt = '''
|
| 41 |
+
You are an AI bot specializing in resume parsing. Extract the following details:
|
| 42 |
+
1. Full Name
|
| 43 |
+
2. Email ID
|
| 44 |
+
3. GitHub Portfolio
|
| 45 |
+
4. LinkedIn ID
|
| 46 |
+
5. Phone Number
|
| 47 |
+
6. Address
|
| 48 |
+
7. Education Details
|
| 49 |
+
8. Employment Details
|
| 50 |
+
9. Experiece Details
|
| 51 |
+
10. Technical Skills
|
| 52 |
+
11. Soft Skills
|
| 53 |
+
Provide the output in JSON format.
|
| 54 |
+
'''
|
| 55 |
+
|
| 56 |
+
# Prepare document for Vision API
|
| 57 |
+
pdf_content = {
|
| 58 |
+
"mime_type": "application/pdf", # Set mime_type to application/pdf
|
| 59 |
+
"data": base64.b64encode(resume_data).decode() # Encode resume data to base64
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# Initialize the model
|
| 63 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
| 64 |
+
|
| 65 |
+
# Send the file and prompt to Vision Pro using generate_content
|
| 66 |
+
response = model.generate_content([pdf_content, prompt])
|
| 67 |
+
|
| 68 |
+
# Parse the response (assuming the response structure)
|
| 69 |
+
parsed_data = response.text # Get the text from the response object
|
| 70 |
+
|
| 71 |
+
return parsed_data
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# Load model directly
|
| 75 |
+
from transformers import AutoTokenizer, AutoModel
|
| 76 |
+
|
| 77 |
+
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 78 |
+
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
st.title('Resume Scoring')
|
| 82 |
+
|
| 83 |
+
# Set a title for the app
|
| 84 |
+
st.header("Job Description")
|
| 85 |
+
|
| 86 |
+
# Add a text input box
|
| 87 |
+
user_input = st.text_input("Enter your job description:")
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
if st.button("Submit"):
|
| 91 |
+
source_sentence = str(user_input)
|
| 92 |
+
folder_path = 'E:/fyp progress/Resume'
|
| 93 |
+
resumes = []
|
| 94 |
+
# Check if the folder exists
|
| 95 |
+
if os.path.exists(folder_path):
|
| 96 |
+
print(f"Accessing folder: {folder_path}")
|
| 97 |
+
|
| 98 |
+
# Step 3: Loop through files in the folder
|
| 99 |
+
for file_name in os.listdir(folder_path):
|
| 100 |
+
file_path = os.path.join(folder_path, file_name)
|
| 101 |
+
|
| 102 |
+
# Check if it's a file
|
| 103 |
+
if os.path.isfile(file_path):
|
| 104 |
+
resumes.append(file_path)
|
| 105 |
+
|
| 106 |
+
for i in resumes:
|
| 107 |
+
parsed_data = ats_extractor(i)
|
| 108 |
+
|
| 109 |
+
start_index = parsed_data.find('{')
|
| 110 |
+
end_index = parsed_data.rfind('}') + 1 # Find the last occurrence of } and add 1 to include it
|
| 111 |
+
json_part = parsed_data[start_index:end_index] # Slice the string to include only the JSON object
|
| 112 |
+
json_data = json.loads(json_part)
|
| 113 |
+
#Address
|
| 114 |
+
location = json_data['Address']
|
| 115 |
+
|
| 116 |
+
#Name
|
| 117 |
+
name = json_data['Full Name']
|
| 118 |
+
|
| 119 |
+
#Email
|
| 120 |
+
email = json_data['Email ID']
|
| 121 |
+
|
| 122 |
+
#Skills
|
| 123 |
+
skills = json_data['Technical Skills'] + json_data['Soft Skills']
|
| 124 |
+
skills = ', '.join(skills)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
#Education
|
| 128 |
+
|
| 129 |
+
education = []
|
| 130 |
+
for i in range(len(json_data['Education Details'])):
|
| 131 |
+
# Use get with default value to handle None and ensure all items are strings
|
| 132 |
+
education.append(str(json_data['Education Details'][i].get('Degree', '')))
|
| 133 |
+
education.append(str(json_data['Education Details'][i].get('University', '')))
|
| 134 |
+
education.append(str(json_data['Education Details'][i].get('Dates', '')))
|
| 135 |
+
education = ', '.join(education)
|
| 136 |
+
|
| 137 |
+
#Expereince
|
| 138 |
+
#Expereince
|
| 139 |
+
data_experience = []
|
| 140 |
+
# Check if 'Experience Details' key exists, otherwise use 'Experiece Details'
|
| 141 |
+
experience_key = 'Experience Details' if 'Experience Details' in json_data else 'Experiece Details'
|
| 142 |
+
|
| 143 |
+
# Corrected key name and added a check for its existence
|
| 144 |
+
experience = json_data.get(experience_key, []) + json_data['Employment Details']
|
| 145 |
+
for i in range(len(experience)):
|
| 146 |
+
data_experience.append(str(experience[i].get('title', ''))) # Use get with default value to handle None
|
| 147 |
+
data_experience.append(str(experience[i].get('description', ''))) # Use get with default value to handle None
|
| 148 |
+
data_experience.append(str(experience[i].get('years', ''))) # Use get with default value to handle None
|
| 149 |
+
|
| 150 |
+
data_experience = ', '.join(data_experience)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
experience_rate = [source_sentence] + [str(experience)] # Convert experience to string
|
| 154 |
+
skill_rate = [source_sentence] + [str(skills)] # Convert skills to string
|
| 155 |
+
education_rate = [source_sentence] + [str(education)] # Convert education to string
|
| 156 |
+
loc_rate = [source_sentence] + [str(location)] # Convert location to string
|
| 157 |
+
|
| 158 |
+
encoded_exp = tokenizer(experience_rate, padding=True, truncation=True, return_tensors='pt')
|
| 159 |
+
encoded_skill = tokenizer(skill_rate, padding=True, truncation=True, return_tensors='pt')
|
| 160 |
+
encoded_edu = tokenizer(education_rate, padding=True, truncation=True, return_tensors='pt')
|
| 161 |
+
encoded_loc = tokenizer(loc_rate, padding=True, truncation=True, return_tensors='pt')
|
| 162 |
+
|
| 163 |
+
# Compute token embeddings
|
| 164 |
+
with torch.no_grad():
|
| 165 |
+
exp_output = model(**encoded_exp)
|
| 166 |
+
skill_output = model(**encoded_skill)
|
| 167 |
+
edu_output = model(**encoded_edu)
|
| 168 |
+
loc_output = model(**encoded_loc)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
#Perform Pooling
|
| 172 |
+
exp_embeddings = mean_pooling(exp_output, encoded_exp['attention_mask'])
|
| 173 |
+
skill_embeddings = mean_pooling(skill_output, encoded_skill['attention_mask'])
|
| 174 |
+
edu_embeddings = mean_pooling(edu_output, encoded_edu['attention_mask'])
|
| 175 |
+
loc_embeddings = mean_pooling(loc_output, encoded_loc['attention_mask'])
|
| 176 |
+
|
| 177 |
+
# Normalize embeddings
|
| 178 |
+
exp_embeddings = F.normalize(exp_embeddings, p=2, dim=1)
|
| 179 |
+
skill_embeddings = F.normalize(skill_embeddings, p=2, dim=1)
|
| 180 |
+
edu_embeddings = F.normalize(edu_embeddings, p=2, dim=1)
|
| 181 |
+
loc_embeddings = F.normalize(loc_embeddings, p=2, dim=1)
|
| 182 |
+
|
| 183 |
+
source_embedding = exp_embeddings[0]
|
| 184 |
+
|
| 185 |
+
comparison_exp = exp_embeddings[1:]
|
| 186 |
+
comparison_skill = skill_embeddings[1:]
|
| 187 |
+
comparison_edu = edu_embeddings[1:]
|
| 188 |
+
comparison_loc = loc_embeddings[1:]
|
| 189 |
+
|
| 190 |
+
cosine_similarities_exp = F.cosine_similarity(source_embedding.unsqueeze(0), comparison_exp)
|
| 191 |
+
cosine_similarities_skill = F.cosine_similarity(source_embedding.unsqueeze(0), comparison_skill)
|
| 192 |
+
cosine_similarities_edu = F.cosine_similarity(source_embedding.unsqueeze(0), comparison_edu)
|
| 193 |
+
cosine_similarities_loc = F.cosine_similarity(source_embedding.unsqueeze(0), comparison_loc)
|
| 194 |
+
|
| 195 |
+
final_score = 0.1*cosine_similarities_loc + 0.3*cosine_similarities_skill + 0.4*cosine_similarities_edu + 0.2*cosine_similarities_exp
|
| 196 |
+
if final_score>0.30:
|
| 197 |
+
st.write(name)
|
| 198 |
+
st.write(final_score)
|
| 199 |
+
# Email sender and receiver details
|
| 200 |
+
sender_email = "interviewerai@aafaaqamir.com"
|
| 201 |
+
receiver_email = email
|
| 202 |
+
password = 'Haseeb69@98' # Replace with your App Password
|
| 203 |
+
subject = "Interview Selection"
|
| 204 |
+
body = f"congratulations {name}! You have been selected for the interview"
|
| 205 |
+
|
| 206 |
+
# Set up the MIME
|
| 207 |
+
message = MIMEMultipart()
|
| 208 |
+
message["From"] = sender_email
|
| 209 |
+
message["To"] = receiver_email
|
| 210 |
+
message["Subject"] = subject
|
| 211 |
+
message.attach(MIMEText(body, "plain"))
|
| 212 |
+
|
| 213 |
+
# Connect to the server and send the email
|
| 214 |
+
try:
|
| 215 |
+
# Use Hostinger SMTP server and port
|
| 216 |
+
server = smtplib.SMTP("smtp.hostinger.com", 587) # Use 465 for SSL
|
| 217 |
+
server.set_debuglevel(1) # Enable debug output
|
| 218 |
+
server.starttls() # Start TLS encryption (omit for port 465)
|
| 219 |
+
server.login(sender_email, password)
|
| 220 |
+
server.sendmail(sender_email, receiver_email, message.as_string())
|
| 221 |
+
print("Email sent successfully!")
|
| 222 |
+
except Exception as e:
|
| 223 |
+
print(f"Error: {e}")
|
| 224 |
+
finally:
|
| 225 |
+
if 'server' in locals() and server:
|
| 226 |
+
server.quit()
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
|