Spaces:
Build error
Build error
File size: 5,085 Bytes
6db7601 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | import os
import re
import spacy
from sklearn.feature_extraction.text import CountVectorizer
from src.groq_client import analyze_resume
# Load spaCy model
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
# If model is not installed, provide instructions
print("The spaCy model 'en_core_web_sm' is not installed.")
print("Please install it using: python3 -m spacy download en_core_web_sm")
# Create a simple placeholder model for basic functionality
nlp = spacy.blank("en")
def preprocess_text(text):
"""Preprocess resume text for analysis
Args:
text (str): Raw text extracted from resume
Returns:
str: Preprocessed text
"""
# Remove special characters and extra whitespace
text = re.sub(r'[^\w\s]', ' ', text)
text = re.sub(r'\s+', ' ', text).strip()
# Convert to lowercase
text = text.lower()
return text
def extract_keywords(text, job_role):
"""Extract keywords from resume text
Args:
text (str): Preprocessed resume text
job_role (str): Target job role
Returns:
list: Extracted keywords
"""
# Process the text with spaCy
doc = nlp(text)
# Extract nouns, proper nouns, and skill-related words
keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"] and len(token.text) > 2]
# Use CountVectorizer to get the most common terms
vectorizer = CountVectorizer(max_features=50, stop_words='english', ngram_range=(1, 2))
X = vectorizer.fit_transform([text])
common_terms = vectorizer.get_feature_names_out()
# Combine and remove duplicates
all_keywords = list(set(keywords + list(common_terms)))
return all_keywords
def analyze_resume_local(resume_text, job_role):
"""Perform local analysis on resume text before calling the Groq API
Args:
resume_text (str): Raw text extracted from resume
job_role (str): Target job role
Returns:
dict: Local analysis results
"""
# Preprocess the text
processed_text = preprocess_text(resume_text)
# Extract keywords
keywords = extract_keywords(processed_text, job_role)
# Perform basic format analysis
format_score = calculate_format_score(resume_text)
# Perform basic readability analysis
readability_score = calculate_readability_score(resume_text)
return {
"local_keywords": keywords,
"local_format_score": format_score,
"local_readability_score": readability_score
}
def calculate_format_score(text):
"""Calculate a basic format score for the resume
Args:
text (str): Resume text
Returns:
int: Format score (0-100)
"""
score = 70 # Base score
# Check for section headers
section_patterns = ["experience", "education", "skills", "projects", "certifications", "summary"]
found_sections = 0
for pattern in section_patterns:
if re.search(r'\b' + pattern + r'\b', text.lower()):
found_sections += 1
# Adjust score based on sections found
section_score = min(found_sections * 5, 20)
score += section_score
# Check for bullet points
bullet_count = text.count('•') + text.count('·') + text.count('-')
bullet_score = min(bullet_count, 10)
score += bullet_score
return min(score, 100) # Cap at 100
def calculate_readability_score(text):
"""Calculate a basic readability score for the resume
Args:
text (str): Resume text
Returns:
int: Readability score (0-100)
"""
# Base score
score = 70
# Split into sentences and words
sentences = re.split(r'[.!?]+', text)
sentences = [s.strip() for s in sentences if s.strip()]
# Calculate average sentence length
if sentences:
words = []
for sentence in sentences:
words.extend(sentence.split())
avg_sentence_length = len(words) / len(sentences)
# Penalize very long sentences
if avg_sentence_length > 25:
score -= 10
elif avg_sentence_length < 10:
score += 5
return min(max(score, 0), 100) # Keep between 0-100
def get_resume_analysis(resume_text, job_role, job_description=None):
"""Main function to analyze a resume
Args:
resume_text (str): Text extracted from resume
job_role (str): Target job role
job_description (str, optional): Specific job description for enhanced analysis
Returns:
dict: Complete analysis results
"""
# First perform local analysis
local_results = analyze_resume_local(resume_text, job_role)
# Then call the Groq API for advanced analysis
groq_results = analyze_resume(resume_text, job_role, job_description)
# Combine results
combined_results = {
**groq_results,
"local_keywords": local_results["local_keywords"]
}
return combined_results |