Spaces:
Sleeping
Sleeping
| """ | |
| Advanced NLP/ML Utilities for SkillSync | |
| This module contains all the intelligent ML features for enhanced resume matching, | |
| scoring, prediction, and recommendations. | |
| """ | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| # Disable TensorFlow logging to avoid Keras warnings | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' | |
| os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' | |
| # Try to import sentence_transformers with better error handling | |
| try: | |
| from sentence_transformers import SentenceTransformer, util | |
| _SENT_TRANSFORMERS_AVAILABLE = True | |
| except Exception as e: | |
| # sentence_transformers not available — provide safe fallbacks so static analysis | |
| # won't fail and runtime code can fallback to simpler heuristics. | |
| print(f"Warning: sentence_transformers not available: {str(e)}") | |
| SentenceTransformer = None | |
| _SENT_TRANSFORMERS_AVAILABLE = False | |
| class _UtilFallback: | |
| def pytorch_cos_sim(a, b): | |
| # Basic numpy cosine similarity fallback that provides an .item() method | |
| try: | |
| a_np = np.array(a) | |
| b_np = np.array(b) | |
| if a_np.ndim == 1: | |
| a_np = a_np.reshape(1, -1) | |
| if b_np.ndim == 1: | |
| b_np = b_np.reshape(1, -1) | |
| num = (a_np * b_np).sum(axis=1) | |
| denom = np.linalg.norm(a_np, axis=1) * np.linalg.norm(b_np, axis=1) | |
| denom = np.where(denom == 0, 1e-8, denom) | |
| sim = num / denom | |
| class _Sim: | |
| def __init__(self, v): | |
| self._v = v | |
| def item(self): | |
| try: | |
| return float(self._v[0]) | |
| except Exception: | |
| return float(self._v) | |
| return _Sim(sim) | |
| except Exception: | |
| class _ZeroSim: | |
| def item(self): return 0.0 | |
| return _ZeroSim() | |
| util = _UtilFallback() | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| try: | |
| import xgboost as xgb | |
| _XGB_AVAILABLE = True | |
| except Exception as e: | |
| print(f"Warning: xgboost not available: {str(e)}") | |
| xgb = None | |
| _XGB_AVAILABLE = False | |
| try: | |
| import joblib | |
| _JOBLIB_AVAILABLE = True | |
| except Exception: | |
| _JOBLIB_AVAILABLE = False | |
| import logging | |
| from collections import Counter | |
| from typing import List, Dict, Tuple | |
| import re | |
| try: | |
| import textstat | |
| _TEXTSTAT_AVAILABLE = True | |
| except Exception: | |
| _TEXTSTAT_AVAILABLE = False | |
| class _TextstatFallback: | |
| def flesch_reading_ease(text): | |
| """ | |
| Lightweight fallback for textstat.flesch_reading_ease using a simple heuristic: | |
| - estimate sentence count by splitting on punctuation, | |
| - estimate word count via word tokens, | |
| - estimate syllables by counting vowel groups per word. | |
| This provides a rough readability score when textstat is unavailable. | |
| """ | |
| # Basic sentence and word tokenization | |
| sentences = re.split(r'[.!?]+', text) | |
| sentences = [s for s in sentences if s.strip()] | |
| words = re.findall(r'\w+', text) | |
| word_count = len(words) or 1 | |
| sentence_count = max(1, len(sentences)) | |
| # Estimate syllables as number of vowel groups per word | |
| syllables = sum(len(re.findall(r'[aeiouy]+', w.lower())) for w in words) or 1 | |
| asl = word_count / sentence_count # average sentence length | |
| asw = syllables / word_count # average syllables per word | |
| # Flesch reading ease formula approximation | |
| score = 206.835 - (1.015 * asl) - (84.6 * asw) | |
| return score | |
| textstat = _TextstatFallback() | |
| try: | |
| from fuzzywuzzy import fuzz | |
| _FUZZYWUZZY_AVAILABLE = True | |
| except Exception as e: | |
| print(f"Warning: fuzzywuzzy not available: {str(e)}") | |
| _FUZZYWUZZY_AVAILABLE = False | |
| class _FuzzFallback: | |
| def ratio(s1, s2): | |
| # Simple Levenshtein distance fallback | |
| if s1 == s2: | |
| return 100 | |
| return 50 | |
| fuzz = _FuzzFallback() | |
| # Configure cache directory for models | |
| MODELS_CACHE = os.getenv('TRANSFORMERS_CACHE', '/tmp/hf_cache') | |
| # Global model instances (lazy loading) | |
| _semantic_model = None | |
| _sentiment_analyzer = None | |
| _ner_model = None | |
| def get_semantic_model(): | |
| """Load or return cached sentence transformer model""" | |
| global _semantic_model | |
| if _semantic_model is None: | |
| try: | |
| _semantic_model = SentenceTransformer('all-MiniLM-L6-v2', cache_folder=MODELS_CACHE) | |
| logging.info("Semantic model loaded successfully") | |
| except Exception as e: | |
| logging.error(f"Error loading semantic model: {str(e)}") | |
| _semantic_model = None | |
| return _semantic_model | |
| def get_sentiment_analyzer(): | |
| """Load or return cached sentiment analysis pipeline""" | |
| global _sentiment_analyzer | |
| if _sentiment_analyzer is None: | |
| try: | |
| # Set TensorFlow to not be required for transformers | |
| os.environ['TRANSFORMERS_NO_TF'] = '1' | |
| from transformers import pipeline | |
| _sentiment_analyzer = pipeline( | |
| "sentiment-analysis", | |
| model="distilbert-base-uncased-finetuned-sst-2-english", | |
| device=-1, # CPU | |
| framework='pt' # Force PyTorch backend | |
| ) | |
| logging.info("Sentiment analyzer loaded successfully") | |
| except Exception as e: | |
| logging.warning(f"Sentiment analyzer not available: {str(e)}") | |
| _sentiment_analyzer = None | |
| return _sentiment_analyzer | |
| def get_ner_model(): | |
| """Load or return cached NER model using spaCy-like transformers""" | |
| global _ner_model | |
| if _ner_model is None: | |
| try: | |
| # Set TensorFlow to not be required for transformers | |
| os.environ['TRANSFORMERS_NO_TF'] = '1' | |
| from transformers import pipeline | |
| _ner_model = pipeline( | |
| "ner", | |
| model="dslim/bert-base-NER", | |
| aggregation_strategy="simple", | |
| device=-1, # CPU | |
| framework='pt' # Force PyTorch backend | |
| ) | |
| logging.info("NER model loaded successfully") | |
| except Exception as e: | |
| logging.warning(f"NER model not available: {str(e)}") | |
| _ner_model = None | |
| return _ner_model | |
| # ============================================================================ | |
| # 1. SEMANTIC MATCHING ENGINE | |
| # ============================================================================ | |
| def semantic_similarity(text1: str, text2: str) -> float: | |
| """ | |
| Calculate semantic similarity between two texts using sentence transformers | |
| Returns similarity score between 0 and 1 | |
| """ | |
| model = get_semantic_model() | |
| if model is None: | |
| # Fallback to simple word overlap | |
| words1 = set(text1.lower().split()) | |
| words2 = set(text2.lower().split()) | |
| if not words1 or not words2: | |
| return 0.0 | |
| return len(words1 & words2) / len(words1 | words2) | |
| try: | |
| embedding1 = model.encode(text1, convert_to_tensor=True) | |
| embedding2 = model.encode(text2, convert_to_tensor=True) | |
| similarity = util.pytorch_cos_sim(embedding1, embedding2).item() | |
| return max(0.0, min(1.0, similarity)) | |
| except Exception as e: | |
| logging.error(f"Error in semantic similarity: {str(e)}") | |
| return 0.0 | |
| def enhanced_skill_matching(user_skills: List[str], required_skills: List[str]) -> Dict: | |
| """ | |
| Advanced skill matching using semantic similarity | |
| Returns detailed match information | |
| """ | |
| if not user_skills or not required_skills: | |
| return { | |
| 'overall_score': 0.0, | |
| 'matched_skills': [], | |
| 'missing_skills': required_skills, | |
| 'semantic_matches': [] | |
| } | |
| user_skills_text = ' '.join(user_skills) | |
| required_skills_text = ' '.join(required_skills) | |
| # Overall semantic similarity | |
| overall_score = semantic_similarity(user_skills_text, required_skills_text) | |
| # Individual skill matching | |
| matched = [] | |
| missing = [] | |
| semantic_matches = [] | |
| for req_skill in required_skills: | |
| best_match_score = 0.0 | |
| best_match_skill = None | |
| for user_skill in user_skills: | |
| score = semantic_similarity(user_skill, req_skill) | |
| if score > best_match_score: | |
| best_match_score = score | |
| best_match_skill = user_skill | |
| if best_match_score > 0.7: # Strong match threshold | |
| matched.append(req_skill) | |
| if best_match_skill != req_skill: | |
| semantic_matches.append({ | |
| 'required': req_skill, | |
| 'user_has': best_match_skill, | |
| 'score': round(best_match_score, 3) | |
| }) | |
| else: | |
| missing.append(req_skill) | |
| return { | |
| 'overall_score': round(overall_score, 3), | |
| 'matched_skills': matched, | |
| 'missing_skills': missing, | |
| 'semantic_matches': semantic_matches, | |
| 'match_percentage': round((len(matched) / len(required_skills)) * 100, 1) | |
| } | |
| # ============================================================================ | |
| # 2. INTELLIGENT SKILL EXTRACTION WITH NER | |
| # ============================================================================ | |
| # Comprehensive skill keywords database | |
| TECHNICAL_SKILLS = { | |
| 'programming': ['python', 'java', 'javascript', 'c++', 'c#', 'ruby', 'php', 'swift', 'kotlin', | |
| 'go', 'rust', 'typescript', 'scala', 'r', 'matlab', 'perl'], | |
| 'web': ['html', 'css', 'react', 'angular', 'vue', 'node.js', 'django', 'flask', 'spring', | |
| 'express', 'fastapi', 'next.js', 'nuxt.js', 'svelte'], | |
| 'database': ['sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch', 'cassandra', | |
| 'oracle', 'dynamodb', 'firebase'], | |
| 'ml_ai': ['machine learning', 'deep learning', 'tensorflow', 'pytorch', 'keras', 'scikit-learn', | |
| 'nlp', 'computer vision', 'neural networks', 'transformers', 'bert', 'gpt'], | |
| 'cloud': ['aws', 'azure', 'gcp', 'docker', 'kubernetes', 'terraform', 'jenkins', 'ci/cd', | |
| 'microservices', 'serverless'], | |
| 'tools': ['git', 'github', 'gitlab', 'jira', 'confluence', 'slack', 'vscode', 'intellij'] | |
| } | |
| SOFT_SKILLS = ['leadership', 'communication', 'teamwork', 'problem solving', 'critical thinking', | |
| 'time management', 'adaptability', 'creativity', 'collaboration', 'negotiation', | |
| 'public speaking', 'presentation', 'analytical', 'detail-oriented', 'self-motivated'] | |
| def extract_skills_intelligent(text: str) -> Dict[str, List[str]]: | |
| """ | |
| Extract skills using NER and fuzzy matching | |
| Returns categorized skills | |
| """ | |
| if not text: | |
| return {'technical': [], 'soft': [], 'all': []} | |
| text_lower = text.lower() | |
| technical_skills = [] | |
| soft_skills = [] | |
| # Extract using fuzzy matching | |
| all_technical = [skill for category in TECHNICAL_SKILLS.values() for skill in category] | |
| for skill in all_technical: | |
| if re.search(r'\b' + re.escape(skill) + r'\b', text_lower): | |
| technical_skills.append(skill) | |
| else: | |
| # Fuzzy match for variations | |
| words = text_lower.split() | |
| for word in words: | |
| if fuzz.ratio(skill, word) > 85: | |
| technical_skills.append(skill) | |
| break | |
| for skill in SOFT_SKILLS: | |
| if re.search(r'\b' + re.escape(skill) + r'\b', text_lower): | |
| soft_skills.append(skill) | |
| # Try NER extraction for additional entities | |
| ner_model = get_ner_model() | |
| if ner_model: | |
| try: | |
| entities = ner_model(text[:512]) # Limit text length | |
| for entity in entities: | |
| if entity['entity_group'] in ['ORG', 'MISC']: | |
| word = entity['word'].lower().strip() | |
| if len(word) > 2 and word not in technical_skills: | |
| technical_skills.append(word) | |
| except Exception as e: | |
| logging.warning(f"NER extraction warning: {str(e)}") | |
| return { | |
| 'technical': list(set(technical_skills)), | |
| 'soft': list(set(soft_skills)), | |
| 'all': list(set(technical_skills + soft_skills)) | |
| } | |
| # ============================================================================ | |
| # 3. AI-POWERED RESUME SCORING | |
| # ============================================================================ | |
| def calculate_resume_score(resume_data: Dict, job_description: str = None) -> Dict: | |
| """ | |
| Comprehensive resume scoring with multiple dimensions | |
| """ | |
| score_breakdown = {} | |
| # 1. Completeness Score (0-25 points) | |
| required_fields = ['skills', 'experience', 'education', 'phone_number', 'email'] | |
| filled_fields = sum(1 for field in required_fields if resume_data.get(field)) | |
| completeness_score = (filled_fields / len(required_fields)) * 25 | |
| score_breakdown['completeness'] = round(completeness_score, 1) | |
| # 2. Skills Depth Score (0-25 points) | |
| skills_text = resume_data.get('skills', '') | |
| extracted_skills = extract_skills_intelligent(skills_text) | |
| technical_count = len(extracted_skills['technical']) | |
| soft_count = len(extracted_skills['soft']) | |
| skills_depth = min(25, (technical_count * 2 + soft_count) * 1.5) | |
| score_breakdown['skills_depth'] = round(skills_depth, 1) | |
| # 3. Experience Quality Score (0-25 points) | |
| experience = resume_data.get('experience', '') | |
| experience_score = 0 | |
| if experience: | |
| # Check for quantifiable achievements (numbers, percentages) | |
| numbers = re.findall(r'\d+', experience) | |
| experience_score += min(10, len(numbers) * 2) | |
| # Check for action verbs | |
| action_verbs = ['developed', 'created', 'managed', 'led', 'implemented', 'designed', | |
| 'built', 'optimized', 'increased', 'improved'] | |
| found_verbs = sum(1 for verb in action_verbs if verb in experience.lower()) | |
| experience_score += min(10, found_verbs * 2) | |
| # Length check | |
| if len(experience) > 100: | |
| experience_score += 5 | |
| score_breakdown['experience_quality'] = round(experience_score, 1) | |
| # 4. Job Match Score (0-25 points) - if job description provided | |
| job_match_score = 0 | |
| if job_description: | |
| resume_text = ' '.join([str(resume_data.get(field, '')) for field in | |
| ['skills', 'experience', 'education', 'certifications']]) | |
| job_match_score = semantic_similarity(resume_text, job_description) * 25 | |
| else: | |
| # Default to skills assessment | |
| job_match_score = min(25, technical_count * 2) | |
| score_breakdown['job_match'] = round(job_match_score, 1) | |
| # Total Score | |
| total_score = sum(score_breakdown.values()) | |
| # Generate recommendations | |
| recommendations = [] | |
| if completeness_score < 20: | |
| missing = [f for f in required_fields if not resume_data.get(f)] | |
| recommendations.append(f"Complete missing sections: {', '.join(missing)}") | |
| if skills_depth < 15: | |
| recommendations.append("Add more technical skills and certifications") | |
| if experience_score < 15: | |
| recommendations.append("Use action verbs and quantify achievements (e.g., 'Increased efficiency by 30%')") | |
| if technical_count < 5: | |
| recommendations.append("List at least 5-7 technical skills relevant to your field") | |
| return { | |
| 'total_score': round(total_score, 1), | |
| 'grade': get_grade(total_score), | |
| 'breakdown': score_breakdown, | |
| 'recommendations': recommendations, | |
| 'technical_skills_count': technical_count, | |
| 'soft_skills_count': soft_count | |
| } | |
| def get_grade(score: float) -> str: | |
| """Convert score to letter grade""" | |
| if score >= 90: | |
| return 'A+ (Excellent)' | |
| elif score >= 80: | |
| return 'A (Very Good)' | |
| elif score >= 70: | |
| return 'B (Good)' | |
| elif score >= 60: | |
| return 'C (Fair)' | |
| else: | |
| return 'D (Needs Improvement)' | |
| # ============================================================================ | |
| # 4. INTERVIEW RESPONSE ANALYSIS | |
| # ============================================================================ | |
| def analyze_interview_response(question: str, response: str) -> Dict: | |
| """ | |
| Analyze interview response using NLP metrics | |
| """ | |
| if not response or len(response.strip()) < 10: | |
| return { | |
| 'score': 0, | |
| 'feedback': 'Response too short. Please provide more detail.', | |
| 'metrics': {} | |
| } | |
| metrics = {} | |
| # 1. Length analysis | |
| word_count = len(response.split()) | |
| metrics['word_count'] = word_count | |
| length_score = min(20, (word_count / 10)) # Optimal: 100-200 words | |
| # 2. Readability | |
| try: | |
| flesch_score = textstat.flesch_reading_ease(response) | |
| metrics['readability'] = round(flesch_score, 1) | |
| readability_score = 15 if 60 <= flesch_score <= 80 else 10 | |
| except: | |
| readability_score = 10 | |
| # 3. Sentiment analysis | |
| sentiment_analyzer = get_sentiment_analyzer() | |
| sentiment_score = 0 | |
| if sentiment_analyzer: | |
| try: | |
| sentiment = sentiment_analyzer(response[:512])[0] | |
| metrics['sentiment'] = sentiment['label'] | |
| metrics['confidence'] = round(sentiment['score'], 2) | |
| # Positive sentiment indicates confidence | |
| sentiment_score = 15 if sentiment['label'] == 'POSITIVE' else 10 | |
| except: | |
| sentiment_score = 10 | |
| else: | |
| sentiment_score = 10 | |
| # 4. Structure analysis (STAR method for behavioral questions) | |
| star_keywords = { | |
| 'situation': ['situation', 'context', 'background', 'scenario'], | |
| 'task': ['task', 'challenge', 'problem', 'goal', 'objective'], | |
| 'action': ['action', 'did', 'implemented', 'developed', 'created', 'solved'], | |
| 'result': ['result', 'outcome', 'achieved', 'improved', 'increased', 'success'] | |
| } | |
| response_lower = response.lower() | |
| star_found = {key: any(kw in response_lower for kw in keywords) | |
| for key, keywords in star_keywords.items()} | |
| structure_score = sum(star_found.values()) * 5 | |
| metrics['star_method'] = star_found | |
| # 5. Technical content (check for technical terms) | |
| technical_terms = extract_skills_intelligent(response) | |
| technical_score = min(20, len(technical_terms['technical']) * 3) | |
| metrics['technical_terms_found'] = len(technical_terms['technical']) | |
| # Total score | |
| total_score = length_score + readability_score + sentiment_score + structure_score + technical_score | |
| # Generate feedback | |
| feedback = [] | |
| if word_count < 50: | |
| feedback.append("Provide more detailed responses (aim for 100-150 words)") | |
| if sum(star_found.values()) < 3: | |
| feedback.append("Use STAR method: Describe Situation, Task, Action, and Result") | |
| if technical_score < 10: | |
| feedback.append("Include relevant technical details and specific examples") | |
| if not feedback: | |
| feedback.append("Great response! Clear, detailed, and well-structured.") | |
| return { | |
| 'score': round(min(100, total_score), 1), | |
| 'grade': get_grade(total_score), | |
| 'feedback': ' | '.join(feedback), | |
| 'metrics': metrics | |
| } | |
| # ============================================================================ | |
| # 5. PREDICTIVE ANALYTICS FOR INTERNSHIP SUCCESS | |
| # ============================================================================ | |
| class InternshipSuccessPredictor: | |
| """ | |
| ML model to predict internship application success | |
| """ | |
| def __init__(self): | |
| self.model = None | |
| self.scaler = StandardScaler() | |
| self.is_trained = False | |
| def extract_features(self, user_data: Dict, internship_data: Dict) -> np.ndarray: | |
| """Extract features for prediction""" | |
| features = [] | |
| # 1. Skills match score | |
| user_skills = user_data.get('skills', '').lower().split(',') | |
| required_skills = internship_data.get('skills_required', '').lower().split(',') | |
| user_skills = [s.strip() for s in user_skills if s.strip()] | |
| required_skills = [s.strip() for s in required_skills if s.strip()] | |
| match_result = enhanced_skill_matching(user_skills, required_skills) | |
| features.append(match_result['overall_score']) | |
| features.append(match_result['match_percentage'] / 100) | |
| # 2. Experience match | |
| years_required = internship_data.get('years_of_experience', 0) | |
| user_experience = user_data.get('experience', '') | |
| # Estimate years from experience text | |
| years_match = 1.0 if years_required == 0 else 0.5 | |
| features.append(years_match) | |
| # 3. Education level | |
| education = user_data.get('education', '').lower() | |
| edu_score = 0.7 | |
| if 'master' in education or 'phd' in education: | |
| edu_score = 1.0 | |
| elif 'bachelor' in education or 'b.s' in education or 'b.e' in education: | |
| edu_score = 0.8 | |
| features.append(edu_score) | |
| # 4. Certifications count | |
| certifications = user_data.get('certifications', '') | |
| cert_count = len([c for c in certifications.split(',') if c.strip()]) if certifications else 0 | |
| features.append(min(1.0, cert_count / 3)) | |
| # 5. Resume completeness | |
| required_fields = ['skills', 'experience', 'education', 'phone_number', 'email'] | |
| completeness = sum(1 for f in required_fields if user_data.get(f)) / len(required_fields) | |
| features.append(completeness) | |
| # 6. Location match (same state/city) | |
| user_location = user_data.get('location', '').lower() | |
| job_location = internship_data.get('location', '').lower() | |
| location_match = 1.0 if user_location in job_location or job_location in user_location else 0.5 | |
| features.append(location_match) | |
| return np.array(features).reshape(1, -1) | |
| def train(self, training_data: pd.DataFrame): | |
| """Train the model with historical data""" | |
| if len(training_data) < 10: | |
| logging.warning("Insufficient training data for internship predictor") | |
| return False | |
| try: | |
| X = training_data.drop(['success'], axis=1) | |
| y = training_data['success'] | |
| # Use XGBoost if available, otherwise RandomForest | |
| if _XGB_AVAILABLE and xgb: | |
| self.model = xgb.XGBClassifier( | |
| n_estimators=100, | |
| max_depth=5, | |
| learning_rate=0.1, | |
| random_state=42 | |
| ) | |
| else: | |
| self.model = RandomForestClassifier( | |
| n_estimators=100, | |
| max_depth=5, | |
| random_state=42 | |
| ) | |
| X_scaled = self.scaler.fit_transform(X) | |
| self.model.fit(X_scaled, y) | |
| self.is_trained = True | |
| logging.info("Internship success predictor trained successfully") | |
| return True | |
| except Exception as e: | |
| logging.error(f"Error training predictor: {str(e)}") | |
| return False | |
| def predict_success_probability(self, user_data: Dict, internship_data: Dict) -> Dict: | |
| """Predict probability of internship application success""" | |
| features = self.extract_features(user_data, internship_data) | |
| if self.is_trained and self.model: | |
| try: | |
| features_scaled = self.scaler.transform(features) | |
| probability = self.model.predict_proba(features_scaled)[0][1] | |
| prediction = self.model.predict(features_scaled)[0] | |
| except: | |
| # Fallback to heuristic | |
| probability = features[0][0] * 0.4 + features[0][1] * 0.3 + features[0][4] * 0.3 | |
| prediction = 1 if probability > 0.5 else 0 | |
| else: | |
| # Heuristic-based prediction | |
| probability = features[0][0] * 0.4 + features[0][1] * 0.3 + features[0][4] * 0.3 | |
| prediction = 1 if probability > 0.5 else 0 | |
| confidence = "High" if probability > 0.7 or probability < 0.3 else "Medium" | |
| return { | |
| 'success_probability': round(probability * 100, 1), | |
| 'prediction': 'Likely' if prediction == 1 else 'Unlikely', | |
| 'confidence': confidence, | |
| 'recommendation': self._generate_recommendation(probability, features[0]) | |
| } | |
| def _generate_recommendation(self, probability: float, features: np.ndarray) -> str: | |
| """Generate personalized recommendation""" | |
| if probability > 0.7: | |
| return "Strong match! Apply with confidence." | |
| elif probability > 0.5: | |
| return "Good match. Consider highlighting relevant projects in your application." | |
| elif probability > 0.3: | |
| return "Moderate match. Improve skills alignment or consider skill development." | |
| else: | |
| return "Skills gap detected. Focus on building required skills before applying." | |
| # ============================================================================ | |
| # 6. PERSONALIZED LEARNING RECOMMENDATIONS | |
| # ============================================================================ | |
| def generate_learning_path(user_skills: List[str], target_skills: List[str], | |
| career_goal: str = None) -> Dict: | |
| """ | |
| Generate personalized learning recommendations | |
| """ | |
| missing_skills = list(set(target_skills) - set(user_skills)) | |
| if not missing_skills: | |
| return { | |
| 'status': 'complete', | |
| 'message': 'You have all required skills!', | |
| 'recommendations': [] | |
| } | |
| # Categorize missing skills | |
| categorized = { | |
| 'beginner': [], | |
| 'intermediate': [], | |
| 'advanced': [] | |
| } | |
| for skill in missing_skills: | |
| skill_lower = skill.lower() | |
| # Simple heuristic for difficulty | |
| if any(x in skill_lower for x in ['basic', 'intro', 'fundamental']): | |
| categorized['beginner'].append(skill) | |
| elif any(x in skill_lower for x in ['advanced', 'expert', 'architect']): | |
| categorized['advanced'].append(skill) | |
| else: | |
| categorized['intermediate'].append(skill) | |
| # Generate course recommendations | |
| recommendations = [] | |
| for skill in missing_skills[:5]: # Top 5 priorities | |
| courses = { | |
| 'skill': skill, | |
| 'resources': [ | |
| { | |
| 'platform': 'Coursera', | |
| 'url': f'https://www.coursera.org/search?query={skill.replace(" ", "+")}', | |
| 'type': 'Online Course' | |
| }, | |
| { | |
| 'platform': 'Udemy', | |
| 'url': f'https://www.udemy.com/courses/search/?q={skill.replace(" ", "+")}', | |
| 'type': 'Video Tutorial' | |
| }, | |
| { | |
| 'platform': 'YouTube', | |
| 'url': f'https://www.youtube.com/results?search_query={skill.replace(" ", "+")}+tutorial', | |
| 'type': 'Free Tutorial' | |
| }, | |
| { | |
| 'platform': 'Documentation', | |
| 'url': f'https://www.google.com/search?q={skill.replace(" ", "+")}+official+documentation', | |
| 'type': 'Official Docs' | |
| } | |
| ], | |
| 'estimated_time': '2-4 weeks', | |
| 'priority': 'High' if skill in target_skills[:3] else 'Medium' | |
| } | |
| recommendations.append(courses) | |
| return { | |
| 'status': 'learning_path_generated', | |
| 'missing_skills_count': len(missing_skills), | |
| 'categorized_skills': categorized, | |
| 'recommendations': recommendations, | |
| 'estimated_total_time': f'{len(missing_skills) * 3} weeks' | |
| } | |
| # ============================================================================ | |
| # 7. CONTENT QUALITY ANALYSIS | |
| # ============================================================================ | |
| def analyze_text_quality(text: str) -> Dict: | |
| """ | |
| Analyze text quality for resumes, cover letters, etc. | |
| """ | |
| if not text or len(text.strip()) < 10: | |
| return {'score': 0, 'issues': ['Text too short']} | |
| issues = [] | |
| score = 100 | |
| # 1. Grammar and spelling (basic checks) | |
| if text != text.strip(): | |
| issues.append("Remove extra whitespace") | |
| score -= 5 | |
| # 2. Readability | |
| try: | |
| flesch = textstat.flesch_reading_ease(text) | |
| if flesch < 30: | |
| issues.append("Text is too complex. Use simpler language.") | |
| score -= 10 | |
| elif flesch > 90: | |
| issues.append("Text may be too simple. Add more detail.") | |
| score -= 5 | |
| except: | |
| pass | |
| # 3. Length appropriateness | |
| word_count = len(text.split()) | |
| if word_count < 50: | |
| issues.append("Add more content (aim for 100+ words)") | |
| score -= 15 | |
| # 4. Professional tone | |
| informal_words = ['gonna', 'wanna', 'yeah', 'cool', 'awesome', 'stuff', 'things'] | |
| found_informal = [w for w in informal_words if w in text.lower()] | |
| if found_informal: | |
| issues.append(f"Use professional language (avoid: {', '.join(found_informal)})") | |
| score -= 10 | |
| # 5. Action verbs presence (for experience sections) | |
| action_verbs = ['developed', 'created', 'managed', 'led', 'implemented', 'designed'] | |
| has_action_verbs = any(verb in text.lower() for verb in action_verbs) | |
| if not has_action_verbs and 'experience' in text.lower(): | |
| issues.append("Use strong action verbs (developed, created, managed, etc.)") | |
| score -= 10 | |
| return { | |
| 'score': max(0, score), | |
| 'grade': get_grade(score), | |
| 'issues': issues if issues else ['Excellent quality!'], | |
| 'word_count': word_count | |
| } | |
| # Global predictor instance | |
| predictor = InternshipSuccessPredictor() | |
| logging.info("ML utilities module loaded successfully") | |