gopichandra commited on
Commit
0fb32c3
·
verified ·
1 Parent(s): fac00d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -57
app.py CHANGED
@@ -1,70 +1,63 @@
 
 
 
1
  from transformers import AutoTokenizer, AutoModel
2
- import pdfplumber
3
  import torch
4
- from sklearn.metrics.pairwise import cosine_similarity
5
- import re
6
 
7
- # Load the Hugging Face MiniLM model for sentence embeddings
8
- model_name = "sentence-transformers/all-MiniLM-L6-v2"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModel.from_pretrained(model_name)
11
 
12
- # Function to extract text from a PDF resume
13
- def extract_text_from_pdf(pdf_file):
14
- with pdfplumber.open(pdf_file) as pdf:
15
- text = ""
16
- for page in pdf.pages:
17
- text += page.extract_text()
 
 
 
 
18
  return text
19
 
20
- # Preprocess the text: lowercasing, removing special characters, and extra spaces
21
- def preprocess_text(text):
22
- text = text.lower() # Convert to lowercase
23
- text = re.sub(r'\s+', ' ', text) # Remove extra spaces
24
- text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
 
25
  return text
26
 
27
- # Function to get embeddings from the text using MiniLM model
28
- def get_embeddings(text):
29
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
30
  with torch.no_grad():
31
- outputs = model(**inputs)
32
- embeddings = outputs.last_hidden_state.mean(dim=1) # Mean of all token embeddings
33
- return embeddings
34
-
35
- # Calculate cosine similarity between job description and resume
36
- def calculate_similarity(job_desc, resume):
37
- job_embeddings = get_embeddings(job_desc)
38
- resume_embeddings = get_embeddings(resume)
39
- similarity = cosine_similarity(job_embeddings, resume_embeddings)
40
- return similarity[0][0]
41
-
42
- # Main function to match LIC profile with job description
43
- def lic_profile_matcher(job_description, resume_pdf):
44
- # Extract text from PDF resume
45
- resume_text = extract_text_from_pdf(resume_pdf)
46
-
47
- # Preprocess the text (clean and standardize)
48
- processed_resume = preprocess_text(resume_text)
49
 
50
- # Calculate similarity score between job description and resume
51
- similarity_score = calculate_similarity(job_description, processed_resume)
52
-
53
- # Define the threshold for matching
54
- if similarity_score > 0.7:
55
- return f"Candidate is a good fit with a similarity score of {similarity_score:.2f}."
56
  else:
57
- return f"Candidate is not a good fit with a similarity score of {similarity_score:.2f}."
58
-
59
- # Example job description for LIC role
60
- job_description = """
61
- We are looking for a motivated sales agent with experience in selling life insurance products.
62
- Experience in customer service, understanding of insurance policies, and excellent communication skills are required.
63
- """
64
 
65
- # Resume PDF (path to the uploaded PDF file)
66
- resume_pdf = "path/to/your/resume.pdf" # Replace with the actual path to your PDF resume
 
 
 
 
 
67
 
68
- # Use the LIC Profile Matcher function
69
- result = lic_profile_matcher(job_description, resume_pdf)
70
- print(result)
 
1
+ from flask import Flask, request, jsonify
2
+ import docx
3
+ import fitz # PyMuPDF for PDF extraction
4
  from transformers import AutoTokenizer, AutoModel
 
5
  import torch
6
+ import os
 
7
 
8
+ app = Flask(__name__)
 
 
 
9
 
10
+ # Load the Hugging Face tokenizer and model for semantic textual similarity
11
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
12
+ model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
13
+
14
+ # Function to extract text from PDF
15
+ def extract_text_from_pdf(pdf_path):
16
+ doc = fitz.open(pdf_path)
17
+ text = ""
18
+ for page in doc:
19
+ text += page.get_text()
20
  return text
21
 
22
+ # Function to extract text from DOCX
23
+ def extract_text_from_docx(docx_path):
24
+ doc = docx.Document(docx_path)
25
+ text = ""
26
+ for para in doc.paragraphs:
27
+ text += para.text + "\n"
28
  return text
29
 
30
+ # Function to calculate semantic similarity score
31
+ def get_similarity_score(text1, text2):
32
+ inputs = tokenizer([text1, text2], padding=True, truncation=True, return_tensors='pt')
33
  with torch.no_grad():
34
+ embeddings = model(**inputs)
35
+ sentence_embeddings = embeddings.last_hidden_state.mean(dim=1)
36
+ similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
37
+ return similarity_score.item()
38
+
39
+ # API endpoint to process the resume and calculate similarity with LIC profile
40
+ @app.route('/score_resume', methods=['POST'])
41
+ def score_resume():
42
+ if 'file' not in request.files:
43
+ return jsonify({"error": "No file part"}), 400
44
+ file = request.files['file']
45
+ lic_profile = request.form.get('lic_profile', '') # LIC profile text to compare against
 
 
 
 
 
 
46
 
47
+ if file.filename.endswith('.pdf'):
48
+ resume_text = extract_text_from_pdf(file)
49
+ elif file.filename.endswith('.docx'):
50
+ resume_text = extract_text_from_docx(file)
 
 
51
  else:
52
+ return jsonify({"error": "Invalid file type. Please upload a PDF or DOCX file."}), 400
 
 
 
 
 
 
53
 
54
+ if not lic_profile:
55
+ return jsonify({"error": "LIC profile text is required."}), 400
56
+
57
+ # Calculate the similarity score between resume and LIC profile
58
+ score = get_similarity_score(resume_text, lic_profile)
59
+
60
+ return jsonify({"similarity_score": score})
61
 
62
+ if __name__ == '__main__':
63
+ app.run(debug=True)