gopichandra commited on
Commit
55671b0
·
verified ·
1 Parent(s): 47a52fd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ import pdfplumber
3
+ import torch
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import re
6
+
7
+ # Load the Hugging Face MiniLM model for sentence embeddings
8
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModel.from_pretrained(model_name)
11
+
12
+ # Function to extract text from a PDF resume
13
+ def extract_text_from_pdf(pdf_file):
14
+ with pdfplumber.open(pdf_file) as pdf:
15
+ text = ""
16
+ for page in pdf.pages:
17
+ text += page.extract_text()
18
+ return text
19
+
20
+ # Preprocess the text: lowercasing, removing special characters, and extra spaces
21
+ def preprocess_text(text):
22
+ text = text.lower() # Convert to lowercase
23
+ text = re.sub(r'\s+', ' ', text) # Remove extra spaces
24
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
25
+ return text
26
+
27
+ # Function to get embeddings from the text using MiniLM model
28
+ def get_embeddings(text):
29
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
30
+ with torch.no_grad():
31
+ outputs = model(**inputs)
32
+ embeddings = outputs.last_hidden_state.mean(dim=1) # Mean of all token embeddings
33
+ return embeddings
34
+
35
+ # Calculate cosine similarity between job description and resume
36
+ def calculate_similarity(job_desc, resume):
37
+ job_embeddings = get_embeddings(job_desc)
38
+ resume_embeddings = get_embeddings(resume)
39
+ similarity = cosine_similarity(job_embeddings, resume_embeddings)
40
+ return similarity[0][0]
41
+
42
+ # Main function to match LIC profile with job description
43
+ def lic_profile_matcher(job_description, resume_pdf):
44
+ # Extract text from PDF resume
45
+ resume_text = extract_text_from_pdf(resume_pdf)
46
+
47
+ # Preprocess the text (clean and standardize)
48
+ processed_resume = preprocess_text(resume_text)
49
+
50
+ # Calculate similarity score between job description and resume
51
+ similarity_score = calculate_similarity(job_description, processed_resume)
52
+
53
+ # Define the threshold for matching
54
+ if similarity_score > 0.7:
55
+ return f"Candidate is a good fit with a similarity score of {similarity_score:.2f}."
56
+ else:
57
+ return f"Candidate is not a good fit with a similarity score of {similarity_score:.2f}."
58
+
59
+ # Example job description for LIC role
60
+ job_description = """
61
+ We are looking for a motivated sales agent with experience in selling life insurance products.
62
+ Experience in customer service, understanding of insurance policies, and excellent communication skills are required.
63
+ """
64
+
65
+ # Resume PDF (path to the uploaded PDF file)
66
+ resume_pdf = "path/to/your/resume.pdf" # Replace with the actual path to your PDF resume
67
+
68
+ # Use the LIC Profile Matcher function
69
+ result = lic_profile_matcher(job_description, resume_pdf)
70
+ print(result)