Spaces:
Build error
Build error
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer, util | |
| import docx | |
| import os | |
| from PyPDF2 import PdfReader | |
| import re | |
| import requests | |
| import pandas as pd | |
| # Load pre-trained model for sentence embedding | |
| model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
| # Define maximum number of resumes | |
| MAX_RESUMES = 10 | |
| # Function to fetch Google API key from environment variable | |
| def get_google_api_key(): | |
| api_key = os.getenv('GOOGLE_API_KEY') # Fetching the API key from environment variables | |
| if not api_key: | |
| raise ValueError("Google API key not found in environment variables.") | |
| return api_key | |
| # Function to extract text from resume (handles .txt, .pdf, .docx) | |
| def extract_text_from_resume(resume_file): | |
| file_extension = os.path.splitext(resume_file)[1].lower() | |
| if file_extension not in ['.txt', '.pdf', '.docx']: | |
| return "Unsupported file format" | |
| if file_extension == '.txt': | |
| return read_text_file(resume_file) | |
| elif file_extension == '.pdf': | |
| return read_pdf_file(resume_file) | |
| elif file_extension == '.docx': | |
| return read_docx_file(resume_file) | |
| return "Failed to read the resume text." | |
| def read_text_file(file_path): | |
| with open(file_path, 'r') as file: | |
| return file.read() | |
| def read_pdf_file(file_path): | |
| reader = PdfReader(file_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def read_docx_file(file_path): | |
| doc = docx.Document(file_path) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text | |
| return text | |
| # System prompt to extract candidate details from the resume | |
| def system_prompt_to_extract_info(resume_text): | |
| prompt = f""" | |
| Extract the following information from the resume: | |
| 1. Candidate's Full Name | |
| 2. Candidate's Email Address | |
| 3. Candidate's Contact Number | |
| Resume Text: {resume_text} | |
| Return the results in the following format: | |
| - Name: [Extracted Name] | |
| - Email: [Extracted Email] | |
| - Contact: [Extracted Contact Number] | |
| """ | |
| return prompt | |
| # Function to extract candidate information from resume text | |
| def extract_entities_via_gemini(resume_text): | |
| api_key = get_google_api_key() # Fetch the API key from environment variables | |
| endpoint = "https://gemini.googleapis.com/v1/documents:analyzeEntities" # Placeholder API endpoint (adjust as necessary) | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| document = { | |
| "document": { | |
| "type": "PLAIN_TEXT", | |
| "content": resume_text | |
| } | |
| } | |
| # Send request to Gemini or another NLP API | |
| response = requests.post(endpoint, headers=headers, json=document) | |
| if response.status_code != 200: | |
| return {"error": "Failed to extract entities from resume", "status_code": response.status_code, "response": response.text} | |
| # Process the response from the Gemini API (or similar NLP API) | |
| entities = response.json().get('entities', []) | |
| extracted_info = {"name": "Unknown Candidate", "email": "No Email", "contact": "No Contact"} | |
| for entity in entities: | |
| if entity['type'] == 'PERSON': | |
| extracted_info['name'] = entity['name'] | |
| if entity['type'] == 'EMAIL': | |
| extracted_info['email'] = entity['name'] | |
| if entity['type'] == 'PHONE_NUMBER': | |
| extracted_info['contact'] = entity['name'] | |
| return extracted_info | |
| # Function to check similarity between resumes and job description | |
| def check_similarity(job_description, resume_files): | |
| results = [] | |
| job_emb = model.encode(job_description, convert_to_tensor=True) | |
| for resume_file in resume_files: | |
| resume_text = extract_text_from_resume(resume_file) | |
| if not resume_text: | |
| results.append((resume_file.name, 0, "Not Eligible", None, "No leadership experience", "No Email", "No Contact")) | |
| continue | |
| # Check for similarity between resume and job description | |
| resume_emb = model.encode(resume_text, convert_to_tensor=True) | |
| similarity_score = util.pytorch_cos_sim(job_emb, resume_emb)[0][0].item() | |
| # Convert similarity score to percentage | |
| similarity_percentage = similarity_score * 100 | |
| # Extract leadership experience | |
| leadership_experience = extract_leadership_experience(resume_text) | |
| # Extract name, email, and contact info using Google Gemini API | |
| contact_info = extract_entities_via_gemini(resume_text) | |
| # Set a higher similarity threshold for eligibility | |
| if similarity_score >= 0.50: | |
| candidate_name = contact_info.get('name', 'Unknown Candidate') | |
| results.append(( | |
| resume_file.name, | |
| similarity_percentage, | |
| "Eligible", | |
| candidate_name, | |
| leadership_experience, | |
| contact_info.get('email', 'No Email'), | |
| contact_info.get('contact', 'No Contact') | |
| )) | |
| else: | |
| results.append(( | |
| resume_file.name, | |
| similarity_percentage, | |
| "Not Eligible", | |
| None, | |
| leadership_experience, | |
| contact_info.get('email', 'No Email'), | |
| contact_info.get('contact', 'No Contact') | |
| )) | |
| return results | |
| # Gradio Interface Components | |
| job_desc_input = gr.File(label="Upload Job Description (TXT)", type="filepath") | |
| resumes_input = gr.Files(label="Upload Resumes (TXT, DOCX, PDF)", type="filepath") | |
| # Gradio Outputs | |
| results_output = gr.Dataframe(headers=[ | |
| "Resume File", | |
| "Similarity Score (%)", | |
| "Eligibility", | |
| "Candidate Name", | |
| "Leadership Experience", | |
| "Email", | |
| "Contact"], | |
| label="Analysis Results" | |
| ) | |
| # Function to allow CSV download | |
| def download_results(results): | |
| df = pd.DataFrame(results, columns=["Resume File", "Similarity Score (%)", "Eligibility", "Candidate Name", "Leadership Experience", "Email", "Contact"]) | |
| csv_file = "/tmp/results.csv" | |
| df.to_csv(csv_file, index=False) | |
| return csv_file # Return the file path | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=check_similarity, | |
| inputs=[job_desc_input, resumes_input], | |
| outputs=[results_output, gr.File(label="Download CSV", file=download_results)], # Corrected this line | |
| title="HR Assistant - Resume Screening & Leadership Experience", | |
| description="Upload job description and resumes to screen candidates for managerial and team leadership roles and extract candidate details.", | |
| allow_flagging="never" | |
| ) | |
| interface.launch() | |