ResumeAnalyserGroq / src /analyzer.py
damndeepesh
Add application file
6db7601
import os
import re
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from src.groq_client import analyze_resume
# Load spaCy model
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
# If model is not installed, provide instructions
print("The spaCy model 'en_core_web_sm' is not installed.")
print("Please install it using: python3 -m spacy download en_core_web_sm")
# Create a simple placeholder model for basic functionality
nlp = spacy.blank("en")
def preprocess_text(text):
"""Preprocess resume text for analysis
Args:
text (str): Raw text extracted from resume
Returns:
str: Preprocessed text
"""
# Remove special characters and extra whitespace
text = re.sub(r'[^\w\s]', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
# Convert to lowercase
text = text.lower()
return text
def extract_keywords(text, job_role):
"""Extract keywords from resume text
Args:
text (str): Preprocessed resume text
job_role (str): Target job role
Returns:
list: Extracted keywords
"""
# Process the text with spaCy
doc = nlp(text)
# Extract nouns, proper nouns, and skill-related words
keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"] and len(token.text) > 2]
# Use CountVectorizer to get the most common terms
vectorizer = CountVectorizer(max_features=50, stop_words='english', ngram_range=(1, 2))
X = vectorizer.fit_transform([text])
common_terms = vectorizer.get_feature_names_out()
# Combine and remove duplicates
all_keywords = list(set(keywords + list(common_terms)))
return all_keywords
def analyze_resume_local(resume_text, job_role):
"""Perform local analysis on resume text before calling the Groq API
Args:
resume_text (str): Raw text extracted from resume
job_role (str): Target job role
Returns:
dict: Local analysis results
"""
# Preprocess the text
processed_text = preprocess_text(resume_text)
# Extract keywords
keywords = extract_keywords(processed_text, job_role)
# Perform basic format analysis
format_score = calculate_format_score(resume_text)
# Perform basic readability analysis
readability_score = calculate_readability_score(resume_text)
return {
"local_keywords": keywords,
"local_format_score": format_score,
"local_readability_score": readability_score
}
def calculate_format_score(text):
"""Calculate a basic format score for the resume
Args:
text (str): Resume text
Returns:
int: Format score (0-100)
"""
score = 70 # Base score
# Check for section headers
section_patterns = ["experience", "education", "skills", "projects", "certifications", "summary"]
found_sections = 0
for pattern in section_patterns:
if re.search(r'\b' + pattern + r'\b', text.lower()):
found_sections += 1
# Adjust score based on sections found
section_score = min(found_sections * 5, 20)
score += section_score
# Check for bullet points
bullet_count = text.count('•') + text.count('·') + text.count('-')
bullet_score = min(bullet_count, 10)
score += bullet_score
return min(score, 100) # Cap at 100
def calculate_readability_score(text):
"""Calculate a basic readability score for the resume
Args:
text (str): Resume text
Returns:
int: Readability score (0-100)
"""
# Base score
score = 70
# Split into sentences and words
sentences = re.split(r'[.!?]+', text)
sentences = [s.strip() for s in sentences if s.strip()]
# Calculate average sentence length
if sentences:
words = []
for sentence in sentences:
words.extend(sentence.split())
avg_sentence_length = len(words) / len(sentences)
# Penalize very long sentences
if avg_sentence_length > 25:
score -= 10
elif avg_sentence_length < 10:
score += 5
return min(max(score, 0), 100) # Keep between 0-100
def get_resume_analysis(resume_text, job_role, job_description=None):
"""Main function to analyze a resume
Args:
resume_text (str): Text extracted from resume
job_role (str): Target job role
job_description (str, optional): Specific job description for enhanced analysis
Returns:
dict: Complete analysis results
"""
# First perform local analysis
local_results = analyze_resume_local(resume_text, job_role)
# Then call the Groq API for advanced analysis
groq_results = analyze_resume(resume_text, job_role, job_description)
# Combine results
combined_results = {
**groq_results,
"local_keywords": local_results["local_keywords"]
}
return combined_results