Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import google.generativeai as genai | |
| import PyPDF2 as pdf | |
| import io | |
| import re | |
| import streamlit as st | |
| from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import torch | |
| # Set API key for Google API (Make sure it's securely set in your environment variables) | |
| api_key = os.getenv('GOOGLE_API_KEY') | |
| if not api_key: | |
| raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.") | |
| # Initialize the generative AI model | |
| genai.configure(api_key=api_key) | |
| # Load pre-trained models | |
| skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple") | |
| education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple") | |
| # Define the task and model for Hugging Face | |
| task = "sentiment-analysis" | |
| model_name = "roberta-base" # Using RoBERTa | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Extract text from uploaded PDF file | |
| def input_pdf_text(uploaded_file): | |
| file_stream = io.BytesIO(uploaded_file.read()) | |
| reader = pdf.PdfReader(file_stream) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Extract candidate name directly from the model response | |
| def extract_name_from_model_response(response_text): | |
| match = re.search(r"Candidate Name:\s*(.*)", response_text) | |
| if match: | |
| return match.group(1) | |
| return "Not Available" | |
| # Extract email and phone numbers using regex | |
| def extract_contact_info(resume_text): | |
| email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text) | |
| email = email_match.group(0) if email_match else "Not Available" | |
| contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text) | |
| contact = contact_match.group(0) if contact_match else "Not Available" | |
| return email, contact | |
| # Extract skills using NER model | |
| def extract_skills(resume_text): | |
| ner_results = skill_extractor(resume_text) | |
| skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL'] | |
| return ", ".join(skills) if skills else "Not Available" | |
| # Extract education information using NER model | |
| def extract_education(resume_text): | |
| ner_results = education_extractor(resume_text) | |
| education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION'] | |
| if education_entities: | |
| return ", ".join(education_entities) | |
| else: | |
| edu_patterns = [ | |
| r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Doctorate|Engineering|Computer Science|Information Technology|Data Science)", | |
| r"(University of [A-Za-z]+.*)" | |
| ] | |
| education = [] | |
| for pattern in edu_patterns: | |
| matches = re.findall(pattern, resume_text) | |
| education.extend(matches) | |
| return ", ".join(education) if education else "Not Available" | |
| # Extract team leadership and management years from the resume | |
| def extract_experience_years(text): | |
| years = 0 | |
| patterns = [ | |
| r"(\d{4})\s?[-to]+\s?(\d{4})", # From year to year | |
| r"(\d+) years", # Exact mention of years | |
| r"since (\d{4})", # Mentions "since" | |
| r"(\d+)\s?[\-–]\s?(\d+)", # Handles year ranges with hyphens (e.g., 2015-2020) | |
| r"(\d+)\s?[\–]\s?present", # Present with range (e.g., 2019–present) | |
| ] | |
| for pattern in patterns: | |
| matches = re.findall(pattern, text) | |
| for match in matches: | |
| if len(match) == 2: | |
| start_year = int(match[0]) | |
| end_year = int(match[1]) | |
| years += end_year - start_year | |
| elif len(match) == 1: | |
| years += int(match[0]) | |
| return years | |
| # Calculate the match percentage using TF-IDF and cosine similarity | |
| def calculate_match_percentage(resume_text, job_description): | |
| documents = [resume_text, job_description] | |
| tfidf_vectorizer = TfidfVectorizer(stop_words='english') | |
| tfidf_matrix = tfidf_vectorizer.fit_transform(documents) | |
| cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) | |
| match_percentage = cosine_sim[0][0] * 100 | |
| return round(match_percentage, 2) | |
| # Generate the detailed analysis from the Gemini model | |
| def get_gemini_response(input_text, job_description): | |
| prompt = f""" | |
| Act as an Applicant Tracking System. Analyze the resume with respect to the job description. | |
| Candidate Details: {input_text} | |
| Job Description: {job_description} | |
| Please extract the following: | |
| 1. Candidate Name | |
| 2. Relevant Skills | |
| 3. Educational Background | |
| 4. Direct Team Leadership Experience (in years) | |
| 5. Direct Management Experience (in years) | |
| 6. Match percentage with the job description | |
| 7. Provide a resume summary in 5 bullet points highlighting the candidate's qualifications. | |
| """ | |
| model = genai.GenerativeModel('gemini-1.5-flash') | |
| response = model.generate_content(prompt) | |
| return response.text.strip() | |
| # Extract a detailed resume summary (focusing on leadership roles and team management experience) | |
| def extract_leadership_summary(response_text): | |
| leadership_summary = "Resume Summary: Leadership and Team Management Experience (in years)\n" | |
| lines = response_text.strip().split("\n") | |
| meaningful_lines = [line.strip() for line in lines if line.strip()] | |
| leadership_experience = [] | |
| for line in meaningful_lines: | |
| if "leadership" in line.lower() or "management" in line.lower() or "team" in line.lower(): | |
| leadership_experience.append(line) | |
| leadership_experience = leadership_experience[-5:] if len(leadership_experience) >= 5 else leadership_experience | |
| for idx, bullet in enumerate(leadership_experience, 1): | |
| leadership_summary += f"{idx}. {bullet}\n" | |
| return leadership_summary | |
| # Analyze the resume using Hugging Face RoBERTa | |
| def analyze_resume(resume_text): | |
| # Create input prompts for different aspects | |
| prompts = [ | |
| f"This resume shows strong managerial responsibilities: {resume_text}", | |
| f"This resume demonstrates excellent leadership skills: {resume_text}", | |
| f"This resume indicates significant work experience: {resume_text}", | |
| f"This resume indicates at least 2 years of relevant experience: {resume_text}" | |
| ] | |
| results = [] | |
| for prompt in prompts: | |
| # Tokenize the prompt with truncation | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True) | |
| outputs = model(**inputs) | |
| predicted_class = torch.argmax(outputs.logits).item() | |
| results.append(predicted_class) | |
| # Interpret the results | |
| analysis = { | |
| "managerial_responsibilities": results[0] == 1, # Assuming 1 is positive sentiment | |
| "leadership_skills": results[1] == 1, | |
| "work_experience": results[2] == 1, | |
| "relevant_experience": results[3] == 1 | |
| } | |
| # Check if all criteria are met | |
| is_suitable = all(analysis.values()) | |
| return analysis, is_suitable | |
| # Streamlit interface to upload files and provide job description | |
| st.title("Resume ATS Analysis Tool") | |
| st.markdown("### Upload Resume and Job Description for Analysis") | |
| # File uploader for resume PDF | |
| uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"]) | |
| # Job description text input | |
| job_description = st.text_area("Job Description", height=200) | |
| if uploaded_file and job_description: | |
| analyze_button = st.button("Analyze") | |
| if analyze_button: | |
| resume_text = input_pdf_text(uploaded_file) | |
| response_text = get_gemini_response(resume_text, job_description) | |
| # Initialize an empty dictionary to hold the dynamic data | |
| data = {} | |
| # Extract candidate name | |
| name = extract_name_from_model_response(response_text) | |
| data['Candidate_Name'] = name if name != "Not Available" else "Not Available" | |
| # Extract contact info (email, phone) | |
| email, contact = extract_contact_info(resume_text) | |
| data['Email'] = email if email != "Not Available" else "Not Available" | |
| data['Contact'] = contact if contact != "Not Available" else "Not Available" | |
| # Calculate match percentage dynamically | |
| match_percentage = calculate_match_percentage(resume_text, job_description) | |
| data['Match_Percentage'] = match_percentage | |
| # Calculate Job Description Match Score dynamically (based on match percentage) | |
| if match_percentage >= 80: | |
| job_description_match_score = "High" | |
| elif match_percentage >= 60: | |
| job_description_match_score = "Medium" | |
| else: | |
| job_description_match_score = "Low" | |
| data['Job_Description_Match_Score'] = job_description_match_score | |
| # Extract leadership and team management summary | |
| leadership_summary = extract_leadership_summary(response_text) | |
| data['Leadership_and_Team_Management_Summary'] = leadership_summary | |
| # Analyze the resume using Hugging Face RoBERTa | |
| analysis, is_suitable = analyze_resume(resume_text) | |
| data['Managerial_Responsibilities'] = analysis['managerial_responsibilities'] | |
| data['Leadership_Skills'] = analysis['leadership_skills'] | |
| data['Work_Experience'] = analysis['work_experience'] | |
| data['Relevant_Experience'] = analysis['relevant_experience'] | |
| data['Suitable_for_Role'] = is_suitable | |
| # Display the results as a table | |
| df = pd.DataFrame([data]) | |
| st.write(df) | |
| # Download the results as a CSV file | |
| csv = df.to_csv(index=False) | |
| st.download_button( | |
| label="Download Results as CSV", | |
| data=csv, | |
| file_name='resume_analysis_results.csv', | |
| mime='text/csv' | |
| ) | |
| else: | |
| st.write("Please upload a resume and provide a job description to analyze.") |