import gradio as gr import PyPDF2 import re import numpy as np import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sentence_transformers import SentenceTransformer import spacy import os # Load SpaCy model for Named Entity Recognition try: nlp = spacy.load("en_core_web_sm") except OSError: os.system("python -m spacy download en_core_web_sm") nlp = spacy.load("en_core_web_sm") # Load Sentence Transformer model sentence_model = SentenceTransformer('all-MiniLM-L6-v2') def extract_text_from_txt(txt_file): """Extracts text from a .txt file.""" if isinstance(txt_file, bytes): return txt_file.decode('utf-8') # Decode bytes to string for .txt files return txt_file.read() # If txt_file is a file-like object def extract_text_from_pdf(pdf_file): """Extracts text from a PDF file.""" reader = PyPDF2.PdfReader(pdf_file) text = '' for page in reader.pages: text += page.extract_text() or '' # Ensure we handle cases where extract_text() returns None return text def extract_years_of_experience(resume_text): """Extracts years of experience using regex.""" matches = re.findall(r'(\d+)\s*[-]?[\s]*(years?|yrs?)', resume_text, re.IGNORECASE) years = [int(match[0]) for match in matches] # Convert found strings to integers return max(years, default=0) # Return the max found years or 0 def extract_skills_nlp(resume_text): """Extracts skills using Named Entity Recognition.""" doc = nlp(resume_text) skills = [] for ent in doc.ents: if ent.label_ == "SKILL": # Adjust based on your model skills.append(ent.text) return list(set(skills)) # Return unique skills def evaluate_with_tfidf(resumes, required_skills): """Evaluate resumes against required skills using TF-IDF.""" all_resumes = resumes + [required_skills] vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(all_resumes) # Calculate similarity of resumes to the required skills similarities = (tfidf_matrix[-1] * tfidf_matrix[:-1].T).toarray().flatten() return similarities def cosine_similarity(a, b): """Calculate cosine similarity between two vectors.""" a = a / np.linalg.norm(a) b = b / np.linalg.norm(b) return np.dot(a, b) def analyze_resumes(resume_files, job_desc_file, required_skills, required_experience_years): """Analyzes resumes based on job description and required criteria.""" # Read the job description job_description = extract_text_from_txt(job_desc_file) if job_desc_file.name.endswith('.txt') else extract_text_from_pdf(job_desc_file) results = { "job_description": job_description, "resumes": [] } # Split required skills into a list required_skills_list = [skill.strip().lower() for skill in required_skills.split(",")] for resume in resume_files: resume_text = "" if resume.name.endswith('.pdf'): resume_text = extract_text_from_pdf(resume) # Handle PDF files elif resume.name.endswith('.txt'): resume_text = extract_text_from_txt(resume) # Handle TXT files else: continue # Skip unsupported file types # Analyze resume text for skills and experience found_skills = extract_skills_nlp(resume_text) # Using NER for skills experience_years = extract_years_of_experience(resume_text) # Calculate TF-IDF similarity for required skills tfidf_similarities = evaluate_with_tfidf([resume_text], ", ".join(required_skills_list)) # Use Sentence Transformers to calculate embedding similarity score resume_embedding = sentence_model.encode(resume_text) job_desc_embedding = sentence_model.encode(job_description) similarity_score = cosine_similarity(resume_embedding, job_desc_embedding) results["resumes"].append({ "resume_text": resume_text, "found_skills": found_skills, "experience_years": experience_years, "tfidf_similarity": tfidf_similarities[0], # Similarity score with required skills "embedding_similarity": similarity_score }) return results # Gradio interface def build_app(): with gr.Blocks() as app: gr.Markdown("## Resume Analysis Tool") with gr.Row(): job_desc_file = gr.File(label="Job Description File (.txt or .pdf)") resume_files = gr.Files(label="Upload Resumes (.txt, .pdf)", file_types=[".txt", ".pdf"]) required_skills = gr.Textbox(label="Required Skills (comma separated)", placeholder="Python, Machine Learning, etc.") required_experience_years = gr.Number(label="Required Experience (in years)", value=5) analyze_button = gr.Button("Analyze Resumes") output_text = gr.JSON(label="Analysis Results") analyze_button.click( analyze_resumes, inputs=[resume_files, job_desc_file, required_skills, required_experience_years], outputs=output_text ) return app # Launch the app if __name__ == "__main__": app = build_app() app.launch(share=True) # Set share=True to create a public link