Spaces:
Build error
Build error
| import os | |
| import re | |
| import spacy | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from src.groq_client import analyze_resume | |
| # Load spaCy model | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| # If model is not installed, provide instructions | |
| print("The spaCy model 'en_core_web_sm' is not installed.") | |
| print("Please install it using: python3 -m spacy download en_core_web_sm") | |
| # Create a simple placeholder model for basic functionality | |
| nlp = spacy.blank("en") | |
| def preprocess_text(text): | |
| """Preprocess resume text for analysis | |
| Args: | |
| text (str): Raw text extracted from resume | |
| Returns: | |
| str: Preprocessed text | |
| """ | |
| # Remove special characters and extra whitespace | |
| text = re.sub(r'[^\w\s]', ' ', text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| # Convert to lowercase | |
| text = text.lower() | |
| return text | |
| def extract_keywords(text, job_role): | |
| """Extract keywords from resume text | |
| Args: | |
| text (str): Preprocessed resume text | |
| job_role (str): Target job role | |
| Returns: | |
| list: Extracted keywords | |
| """ | |
| # Process the text with spaCy | |
| doc = nlp(text) | |
| # Extract nouns, proper nouns, and skill-related words | |
| keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"] and len(token.text) > 2] | |
| # Use CountVectorizer to get the most common terms | |
| vectorizer = CountVectorizer(max_features=50, stop_words='english', ngram_range=(1, 2)) | |
| X = vectorizer.fit_transform([text]) | |
| common_terms = vectorizer.get_feature_names_out() | |
| # Combine and remove duplicates | |
| all_keywords = list(set(keywords + list(common_terms))) | |
| return all_keywords | |
| def analyze_resume_local(resume_text, job_role): | |
| """Perform local analysis on resume text before calling the Groq API | |
| Args: | |
| resume_text (str): Raw text extracted from resume | |
| job_role (str): Target job role | |
| Returns: | |
| dict: Local analysis results | |
| """ | |
| # Preprocess the text | |
| processed_text = preprocess_text(resume_text) | |
| # Extract keywords | |
| keywords = extract_keywords(processed_text, job_role) | |
| # Perform basic format analysis | |
| format_score = calculate_format_score(resume_text) | |
| # Perform basic readability analysis | |
| readability_score = calculate_readability_score(resume_text) | |
| return { | |
| "local_keywords": keywords, | |
| "local_format_score": format_score, | |
| "local_readability_score": readability_score | |
| } | |
| def calculate_format_score(text): | |
| """Calculate a basic format score for the resume | |
| Args: | |
| text (str): Resume text | |
| Returns: | |
| int: Format score (0-100) | |
| """ | |
| score = 70 # Base score | |
| # Check for section headers | |
| section_patterns = ["experience", "education", "skills", "projects", "certifications", "summary"] | |
| found_sections = 0 | |
| for pattern in section_patterns: | |
| if re.search(r'\b' + pattern + r'\b', text.lower()): | |
| found_sections += 1 | |
| # Adjust score based on sections found | |
| section_score = min(found_sections * 5, 20) | |
| score += section_score | |
| # Check for bullet points | |
| bullet_count = text.count('•') + text.count('·') + text.count('-') | |
| bullet_score = min(bullet_count, 10) | |
| score += bullet_score | |
| return min(score, 100) # Cap at 100 | |
| def calculate_readability_score(text): | |
| """Calculate a basic readability score for the resume | |
| Args: | |
| text (str): Resume text | |
| Returns: | |
| int: Readability score (0-100) | |
| """ | |
| # Base score | |
| score = 70 | |
| # Split into sentences and words | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s.strip() for s in sentences if s.strip()] | |
| # Calculate average sentence length | |
| if sentences: | |
| words = [] | |
| for sentence in sentences: | |
| words.extend(sentence.split()) | |
| avg_sentence_length = len(words) / len(sentences) | |
| # Penalize very long sentences | |
| if avg_sentence_length > 25: | |
| score -= 10 | |
| elif avg_sentence_length < 10: | |
| score += 5 | |
| return min(max(score, 0), 100) # Keep between 0-100 | |
| def get_resume_analysis(resume_text, job_role, job_description=None): | |
| """Main function to analyze a resume | |
| Args: | |
| resume_text (str): Text extracted from resume | |
| job_role (str): Target job role | |
| job_description (str, optional): Specific job description for enhanced analysis | |
| Returns: | |
| dict: Complete analysis results | |
| """ | |
| # First perform local analysis | |
| local_results = analyze_resume_local(resume_text, job_role) | |
| # Then call the Groq API for advanced analysis | |
| groq_results = analyze_resume(resume_text, job_role, job_description) | |
| # Combine results | |
| combined_results = { | |
| **groq_results, | |
| "local_keywords": local_results["local_keywords"] | |
| } | |
| return combined_results |