Spaces:

gopichandra
/

LIC_PROFILE_MATCHER

Runtime error

App Files Files Community

gopichandra commited on Jun 20, 2025

Commit

0fb32c3

verified ·

1 Parent(s): fac00d6

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -57

app.py CHANGED Viewed

@@ -1,70 +1,63 @@
 from transformers import AutoTokenizer, AutoModel
-import pdfplumber
 import torch
-from sklearn.metrics.pairwise import cosine_similarity
-import re
-# Load the Hugging Face MiniLM model for sentence embeddings
-model_name = "sentence-transformers/all-MiniLM-L6-v2"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModel.from_pretrained(model_name)
-# Function to extract text from a PDF resume
-def extract_text_from_pdf(pdf_file):
-    with pdfplumber.open(pdf_file) as pdf:
-        text = ""
-        for page in pdf.pages:
-            text += page.extract_text()
     return text
-# Preprocess the text: lowercasing, removing special characters, and extra spaces
-def preprocess_text(text):
-    text = text.lower()  # Convert to lowercase
-    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
-    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
     return text
-# Function to get embeddings from the text using MiniLM model
-def get_embeddings(text):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
-        outputs = model(**inputs)
-    embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean of all token embeddings
-    return embeddings
-# Calculate cosine similarity between job description and resume
-def calculate_similarity(job_desc, resume):
-    job_embeddings = get_embeddings(job_desc)
-    resume_embeddings = get_embeddings(resume)
-    similarity = cosine_similarity(job_embeddings, resume_embeddings)
-    return similarity[0][0]
-# Main function to match LIC profile with job description
-def lic_profile_matcher(job_description, resume_pdf):
-    # Extract text from PDF resume
-    resume_text = extract_text_from_pdf(resume_pdf)
-    # Preprocess the text (clean and standardize)
-    processed_resume = preprocess_text(resume_text)
-    # Calculate similarity score between job description and resume
-    similarity_score = calculate_similarity(job_description, processed_resume)
-    # Define the threshold for matching
-    if similarity_score > 0.7:
-        return f"Candidate is a good fit with a similarity score of {similarity_score:.2f}."
     else:
-        return f"Candidate is not a good fit with a similarity score of {similarity_score:.2f}."
-# Example job description for LIC role
-job_description = """
-We are looking for a motivated sales agent with experience in selling life insurance products.
-Experience in customer service, understanding of insurance policies, and excellent communication skills are required.
-"""
-# Resume PDF (path to the uploaded PDF file)
-resume_pdf = "path/to/your/resume.pdf"  # Replace with the actual path to your PDF resume
-# Use the LIC Profile Matcher function
-result = lic_profile_matcher(job_description, resume_pdf)
-print(result)

+from flask import Flask, request, jsonify
+import docx
+import fitz  # PyMuPDF for PDF extraction
 from transformers import AutoTokenizer, AutoModel
 import torch
+import os
+app = Flask(__name__)
+# Load the Hugging Face tokenizer and model for semantic textual similarity
+tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_path):
+    doc = fitz.open(pdf_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
     return text
+# Function to extract text from DOCX
+def extract_text_from_docx(docx_path):
+    doc = docx.Document(docx_path)
+    text = ""
+    for para in doc.paragraphs:
+        text += para.text + "\n"
     return text
+# Function to calculate semantic similarity score
+def get_similarity_score(text1, text2):
+    inputs = tokenizer([text1, text2], padding=True, truncation=True, return_tensors='pt')
     with torch.no_grad():
+        embeddings = model(**inputs)
+        sentence_embeddings = embeddings.last_hidden_state.mean(dim=1)
+    similarity_score = torch.nn.functional.cosine_similarity(sentence_embeddings[0], sentence_embeddings[1], dim=0)
+    return similarity_score.item()
+# API endpoint to process the resume and calculate similarity with LIC profile
+@app.route('/score_resume', methods=['POST'])
+def score_resume():
+    if 'file' not in request.files:
+        return jsonify({"error": "No file part"}), 400
+    file = request.files['file']
+    lic_profile = request.form.get('lic_profile', '')  # LIC profile text to compare against
+    if file.filename.endswith('.pdf'):
+        resume_text = extract_text_from_pdf(file)
+    elif file.filename.endswith('.docx'):
+        resume_text = extract_text_from_docx(file)
     else:
+        return jsonify({"error": "Invalid file type. Please upload a PDF or DOCX file."}), 400
+    if not lic_profile:
+        return jsonify({"error": "LIC profile text is required."}), 400
+    # Calculate the similarity score between resume and LIC profile
+    score = get_similarity_score(resume_text, lic_profile)
+    return jsonify({"similarity_score": score})
+if __name__ == '__main__':
+    app.run(debug=True)