Spaces:

damndeepesh
/

ResumeAnalyserGroq

Build error

damndeepesh

Add application file

6db7601 12 months ago

5.09 kB

	import os
	import re
	import spacy
	from sklearn.feature_extraction.text import CountVectorizer
	from src.groq_client import analyze_resume

	# Load spaCy model
	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	# If model is not installed, provide instructions
	print("The spaCy model 'en_core_web_sm' is not installed.")
	print("Please install it using: python3 -m spacy download en_core_web_sm")
	# Create a simple placeholder model for basic functionality
	nlp = spacy.blank("en")

	def preprocess_text(text):
	"""Preprocess resume text for analysis

	Args:
	text (str): Raw text extracted from resume

	Returns:
	str: Preprocessed text
	"""
	# Remove special characters and extra whitespace
	text = re.sub(r'[^\w\s]', ' ', text)
	text = re.sub(r'\s+', ' ', text).strip()

	# Convert to lowercase
	text = text.lower()

	return text

	def extract_keywords(text, job_role):
	"""Extract keywords from resume text

	Args:
	text (str): Preprocessed resume text
	job_role (str): Target job role

	Returns:
	list: Extracted keywords
	"""
	# Process the text with spaCy
	doc = nlp(text)

	# Extract nouns, proper nouns, and skill-related words
	keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"] and len(token.text) > 2]

	# Use CountVectorizer to get the most common terms
	vectorizer = CountVectorizer(max_features=50, stop_words='english', ngram_range=(1, 2))
	X = vectorizer.fit_transform([text])
	common_terms = vectorizer.get_feature_names_out()

	# Combine and remove duplicates
	all_keywords = list(set(keywords + list(common_terms)))

	return all_keywords

	def analyze_resume_local(resume_text, job_role):
	"""Perform local analysis on resume text before calling the Groq API

	Args:
	resume_text (str): Raw text extracted from resume
	job_role (str): Target job role

	Returns:
	dict: Local analysis results
	"""
	# Preprocess the text
	processed_text = preprocess_text(resume_text)

	# Extract keywords
	keywords = extract_keywords(processed_text, job_role)

	# Perform basic format analysis
	format_score = calculate_format_score(resume_text)

	# Perform basic readability analysis
	readability_score = calculate_readability_score(resume_text)

	return {
	"local_keywords": keywords,
	"local_format_score": format_score,
	"local_readability_score": readability_score
	}

	def calculate_format_score(text):
	"""Calculate a basic format score for the resume

	Args:
	text (str): Resume text

	Returns:
	int: Format score (0-100)
	"""
	score = 70 # Base score

	# Check for section headers
	section_patterns = ["experience", "education", "skills", "projects", "certifications", "summary"]
	found_sections = 0
	for pattern in section_patterns:
	if re.search(r'\b' + pattern + r'\b', text.lower()):
	found_sections += 1

	# Adjust score based on sections found
	section_score = min(found_sections * 5, 20)
	score += section_score

	# Check for bullet points
	bullet_count = text.count('•') + text.count('·') + text.count('-')
	bullet_score = min(bullet_count, 10)
	score += bullet_score

	return min(score, 100) # Cap at 100

	def calculate_readability_score(text):
	"""Calculate a basic readability score for the resume

	Args:
	text (str): Resume text

	Returns:
	int: Readability score (0-100)
	"""
	# Base score
	score = 70

	# Split into sentences and words
	sentences = re.split(r'[.!?]+', text)
	sentences = [s.strip() for s in sentences if s.strip()]

	# Calculate average sentence length
	if sentences:
	words = []
	for sentence in sentences:
	words.extend(sentence.split())

	avg_sentence_length = len(words) / len(sentences)

	# Penalize very long sentences
	if avg_sentence_length > 25:
	score -= 10
	elif avg_sentence_length < 10:
	score += 5

	return min(max(score, 0), 100) # Keep between 0-100

	def get_resume_analysis(resume_text, job_role, job_description=None):
	"""Main function to analyze a resume

	Args:
	resume_text (str): Text extracted from resume
	job_role (str): Target job role
	job_description (str, optional): Specific job description for enhanced analysis

	Returns:
	dict: Complete analysis results
	"""
	# First perform local analysis
	local_results = analyze_resume_local(resume_text, job_role)

	# Then call the Groq API for advanced analysis
	groq_results = analyze_resume(resume_text, job_role, job_description)

	# Combine results
	combined_results = {
	**groq_results,
	"local_keywords": local_results["local_keywords"]
	}

	return combined_results