Spaces:
Build error
Build error
| import gradio as gr | |
| import PyPDF2 | |
| import re | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sentence_transformers import SentenceTransformer | |
| import spacy | |
| import os | |
| # Load SpaCy model for Named Entity Recognition | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| os.system("python -m spacy download en_core_web_sm") | |
| nlp = spacy.load("en_core_web_sm") | |
| # Load Sentence Transformer model | |
| sentence_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| def extract_text_from_txt(txt_file): | |
| """Extracts text from a .txt file.""" | |
| if isinstance(txt_file, bytes): | |
| return txt_file.decode('utf-8') # Decode bytes to string for .txt files | |
| return txt_file.read() # If txt_file is a file-like object | |
| def extract_text_from_pdf(pdf_file): | |
| """Extracts text from a PDF file.""" | |
| reader = PyPDF2.PdfReader(pdf_file) | |
| text = '' | |
| for page in reader.pages: | |
| text += page.extract_text() or '' # Ensure we handle cases where extract_text() returns None | |
| return text | |
| def extract_years_of_experience(resume_text): | |
| """Extracts years of experience using regex.""" | |
| matches = re.findall(r'(\d+)\s*[-]?[\s]*(years?|yrs?)', resume_text, re.IGNORECASE) | |
| years = [int(match[0]) for match in matches] # Convert found strings to integers | |
| return max(years, default=0) # Return the max found years or 0 | |
| def extract_skills_nlp(resume_text): | |
| """Extracts skills using Named Entity Recognition.""" | |
| doc = nlp(resume_text) | |
| skills = [] | |
| for ent in doc.ents: | |
| if ent.label_ == "SKILL": # Adjust based on your model | |
| skills.append(ent.text) | |
| return list(set(skills)) # Return unique skills | |
| def evaluate_with_tfidf(resumes, required_skills): | |
| """Evaluate resumes against required skills using TF-IDF.""" | |
| all_resumes = resumes + [required_skills] | |
| vectorizer = TfidfVectorizer() | |
| tfidf_matrix = vectorizer.fit_transform(all_resumes) | |
| # Calculate similarity of resumes to the required skills | |
| similarities = (tfidf_matrix[-1] * tfidf_matrix[:-1].T).toarray().flatten() | |
| return similarities | |
| def cosine_similarity(a, b): | |
| """Calculate cosine similarity between two vectors.""" | |
| a = a / np.linalg.norm(a) | |
| b = b / np.linalg.norm(b) | |
| return np.dot(a, b) | |
| def analyze_resumes(resume_files, job_desc_file, required_skills, required_experience_years): | |
| """Analyzes resumes based on job description and required criteria.""" | |
| # Read the job description | |
| job_description = extract_text_from_txt(job_desc_file) if job_desc_file.name.endswith('.txt') else extract_text_from_pdf(job_desc_file) | |
| results = { | |
| "job_description": job_description, | |
| "resumes": [] | |
| } | |
| # Split required skills into a list | |
| required_skills_list = [skill.strip().lower() for skill in required_skills.split(",")] | |
| for resume in resume_files: | |
| resume_text = "" | |
| if resume.name.endswith('.pdf'): | |
| resume_text = extract_text_from_pdf(resume) # Handle PDF files | |
| elif resume.name.endswith('.txt'): | |
| resume_text = extract_text_from_txt(resume) # Handle TXT files | |
| else: | |
| continue # Skip unsupported file types | |
| # Analyze resume text for skills and experience | |
| found_skills = extract_skills_nlp(resume_text) # Using NER for skills | |
| experience_years = extract_years_of_experience(resume_text) | |
| # Calculate TF-IDF similarity for required skills | |
| tfidf_similarities = evaluate_with_tfidf([resume_text], ", ".join(required_skills_list)) | |
| # Use Sentence Transformers to calculate embedding similarity score | |
| resume_embedding = sentence_model.encode(resume_text) | |
| job_desc_embedding = sentence_model.encode(job_description) | |
| similarity_score = cosine_similarity(resume_embedding, job_desc_embedding) | |
| results["resumes"].append({ | |
| "resume_text": resume_text, | |
| "found_skills": found_skills, | |
| "experience_years": experience_years, | |
| "tfidf_similarity": tfidf_similarities[0], # Similarity score with required skills | |
| "embedding_similarity": similarity_score | |
| }) | |
| return results | |
| # Gradio interface | |
| def build_app(): | |
| with gr.Blocks() as app: | |
| gr.Markdown("## Resume Analysis Tool") | |
| with gr.Row(): | |
| job_desc_file = gr.File(label="Job Description File (.txt or .pdf)") | |
| resume_files = gr.Files(label="Upload Resumes (.txt, .pdf)", file_types=[".txt", ".pdf"]) | |
| required_skills = gr.Textbox(label="Required Skills (comma separated)", placeholder="Python, Machine Learning, etc.") | |
| required_experience_years = gr.Number(label="Required Experience (in years)", value=5) | |
| analyze_button = gr.Button("Analyze Resumes") | |
| output_text = gr.JSON(label="Analysis Results") | |
| analyze_button.click( | |
| analyze_resumes, | |
| inputs=[resume_files, job_desc_file, required_skills, required_experience_years], | |
| outputs=output_text | |
| ) | |
| return app | |
| # Launch the app | |
| if __name__ == "__main__": | |
| app = build_app() | |
| app.launch(share=True) # Set share=True to create a public link | |