| | import streamlit as st |
| | import re |
| | import json |
| | from PyPDF2 import PdfReader |
| | from docx import Document |
| | import spacy |
| | from sentence_transformers import SentenceTransformer, util |
| | from transformers import pipeline, AutoTokenizer, T5ForConditionalGeneration |
| | import os |
| | from groq import Groq |
| |
|
| | |
| | client = Groq(api_key=os.environ["GROQ_API_KEY"]) |
| |
|
| | |
| | try: |
| | nlp = spacy.load("en_core_web_sm") |
| | except OSError: |
| | from spacy.cli import download |
| | download("en_core_web_sm") |
| | nlp = spacy.load("en_core_web_sm") |
| | |
| | |
| |
|
| | |
| | similarity_model = SentenceTransformer('all-MiniLM-L6-v2') |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained("t5-base", use_fast=False) |
| | model = T5ForConditionalGeneration.from_pretrained("t5-base") |
| | question_generator = pipeline( |
| | "text2text-generation", |
| | model=model, |
| | tokenizer=tokenizer, |
| | framework="pt" |
| | ) |
| |
|
| | def extract_text(file): |
| | """Extract text from various file formats""" |
| | if file.name.endswith('.pdf'): |
| | reader = PdfReader(file) |
| | return " ".join([page.extract_text() for page in reader.pages]) |
| | elif file.name.endswith('.docx'): |
| | doc = Document(file) |
| | return " ".join([para.text for para in doc.paragraphs]) |
| | elif file.name.endswith('.txt'): |
| | return file.read().decode() |
| | return "" |
| |
|
| | def extract_contact_info(text): |
| | """Extract phone numbers and emails using regex""" |
| | phone_pattern = r'\+?\d{1,3}[-.\s]?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{4}' |
| | email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' |
| | |
| | phones = re.findall(phone_pattern, text) |
| | emails = re.findall(email_pattern, text) |
| | |
| | return { |
| | 'phone': ', '.join(phones) if phones else 'Not found', |
| | 'email': ', '.join(emails) if emails else 'Not found' |
| | } |
| |
|
| |
|
| | def extract_name(text): |
| | """Extract candidate name using SpaCy NER""" |
| | doc = nlp(text) |
| | for ent in doc.ents: |
| | if ent.label_ == 'PERSON': |
| | return ent.text |
| | return "Not found" |
| |
|
| | def analyze_sections(text): |
| | """Parse resume sections using rule-based approach""" |
| | sections = { |
| | 'experience': [], |
| | 'skills': [], |
| | 'education': [], |
| | 'certifications': [] |
| | } |
| | |
| | current_section = None |
| | section_keywords = { |
| | 'experience': ['experience', 'work history', 'employment'], |
| | 'skills': ['skills', 'competencies', 'technologies'], |
| | 'education': ['education', 'academic background'], |
| | 'certifications': ['certifications', 'licenses', 'courses'] |
| | } |
| | |
| | for line in text.split('\n'): |
| | line_lower = line.strip().lower() |
| | |
| | |
| | for section, keywords in section_keywords.items(): |
| | if any(keyword in line_lower for keyword in keywords): |
| | current_section = section |
| | break |
| | else: |
| | if current_section and line.strip(): |
| | sections[current_section].append(line.strip()) |
| | |
| | return {k: '\n'.join(v) if v else 'Not found' for k, v in sections.items()} |
| |
|
| | def calculate_similarity(resume_text, jd_text): |
| | """Calculate semantic similarity between resume and JD""" |
| | embeddings = similarity_model.encode([resume_text, jd_text]) |
| | return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item() * 100 |
| |
|
| | def generate_interview_questions(resume_text, jd_text): |
| | """Generate interview questions using Groq API""" |
| | input_text = f"Generate interview questions based on the resume and job description.Here is the resume: {resume_text}\n and here is the Job Description:{jd_text} Give me concise to the point questions only. Not description of resume or Job Description." |
| | |
| | response = client.chat.completions.create( |
| | messages=[ |
| | {"role": "user", "content": input_text} |
| | ], |
| | model="deepseek-r1-distill-llama-70b" |
| | ) |
| | return response.choices[0].message.content |
| |
|
| | |
| | st.set_page_config(page_title="AI Resume Analyzer", layout="wide") |
| |
|
| | |
| | st.title("AI-Powered Resume Analyzer π§ ") |
| | st.markdown(""" |
| | Upload a candidate's resume and paste the job description to get: |
| | - Candidate profile analysis |
| | - Job requirement matching |
| | - Automated interview questions |
| | """) |
| |
|
| | |
| | with st.container(): |
| | col1, col2 = st.columns([2, 3]) |
| | |
| | with col1: |
| | uploaded_file = st.file_uploader( |
| | "Upload Resume (PDF/DOCX/TXT)", |
| | type=['pdf', 'docx', 'txt'], |
| | help="Supported formats: PDF, Word, Text" |
| | ) |
| | |
| | with col2: |
| | jd_input = st.text_area( |
| | "Paste Job Description", |
| | height=200, |
| | placeholder="Paste the complete job description here..." |
| | ) |
| |
|
| | if st.button("Process Resume"): |
| | if uploaded_file and jd_input: |
| | resume_text = extract_text(uploaded_file) |
| | |
| | if resume_text: |
| | |
| | st.header("π€ Candidate Profile") |
| | profile_col1, profile_col2 = st.columns([1, 2]) |
| | |
| | with profile_col1: |
| | st.subheader("Basic Information") |
| | name = extract_name(resume_text) |
| | contact = extract_contact_info(resume_text) |
| | |
| | st.markdown(f""" |
| | **Name:** {name} |
| | **Phone:** {contact['phone']} |
| | **Email:** {contact['email']} |
| | """) |
| | |
| | with profile_col2: |
| | st.subheader("Professional Summary") |
| | sections = analyze_sections(resume_text) |
| | |
| | exp_col, edu_col = st.columns(2) |
| | with exp_col: |
| | with st.expander("Work Experience"): |
| | st.write(sections['experience']) |
| | |
| | with edu_col: |
| | with st.expander("Education"): |
| | st.write(sections['education']) |
| | |
| | skills_col, cert_col = st.columns(2) |
| | with skills_col: |
| | with st.expander("Skills"): |
| | st.write(sections['skills']) |
| | |
| | with cert_col: |
| | with st.expander("Certifications"): |
| | st.write(sections['certifications']) |
| | |
| | |
| | st.header("π Job Compatibility Analysis") |
| | match_score = calculate_similarity(resume_text, jd_input) |
| | |
| | col1, col2 = st.columns([1, 3]) |
| | with col1: |
| | st.metric("Match Percentage", f"{match_score:.1f}%") |
| | |
| | with col2: |
| | st.progress(match_score/100) |
| | st.caption("Semantic similarity score between resume content and job description") |
| | |
| | |
| | st.header("β Suggested Interview Questions") |
| | questions = generate_interview_questions(resume_text, jd_input) |
| | |
| | if questions: |
| | st.write(questions) |
| | |
| | |
| | |
| | else: |
| | st.warning("Could not generate questions. Please try with more detailed inputs.") |
| | |
| | else: |
| | st.info("π Please upload a resume and enter a job description to begin analysis") |
| |
|
| | |
| | st.markdown("---") |
| | st.markdown("Built with β₯ using [Streamlit](https://streamlit.io) | [Hugging Face](https://huggingface.co) | [Spacy](https://spacy.io) | FAISS | Groq AI") |