Spaces:
Sleeping
Sleeping
| """Text Embeddings Module - NLP-based Scoring""" | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from typing import Dict, Tuple | |
| import re | |
| class TextModule: | |
| """Scores text responses using SBERT embeddings and heuristics""" | |
| def __init__(self): | |
| # Load SBERT model | |
| self.model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Reference embeddings for ideal responses | |
| self.reference_embeddings = { | |
| 'strengths': self.model.encode([ | |
| "I have strong technical skills in programming, problem-solving, and software development", | |
| "My strengths include leadership, communication, and analytical thinking", | |
| "I excel at teamwork, project management, and innovative solutions" | |
| ]), | |
| 'career': self.model.encode([ | |
| "I am interested in software engineering and technology innovation", | |
| "I want to work in data science and machine learning", | |
| "My goal is to become a product manager and lead technical teams" | |
| ]) | |
| } | |
| # Leadership keywords | |
| self.leadership_keywords = [ | |
| 'lead', 'leader', 'leadership', 'managed', 'organized', 'president', | |
| 'head', 'coordinator', 'captain', 'founded', 'initiated', 'directed' | |
| ] | |
| def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]: | |
| """ | |
| Calculate text score from 3 textual responses | |
| Returns: (score, confidence, features) | |
| """ | |
| features = {} | |
| text_q1 = text_responses.get('text_q1', '') | |
| text_q2 = text_responses.get('text_q2', '') | |
| text_q3 = text_responses.get('text_q3', '') | |
| # Feature 1: Writing quality (text_q1 - strengths) | |
| features['writing_quality'] = self._assess_writing_quality(text_q1) | |
| # Feature 2: Intent coherence (text_q2 - career interests) | |
| features['intent_coherence'] = self._assess_intent_coherence(text_q2) | |
| # Feature 3: Leadership flag (text_q3 - extracurriculars) | |
| features['leadership_score'] = self._assess_leadership(text_q3) | |
| # Feature 4: Content depth (all responses) | |
| features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3) | |
| # Calculate overall text score | |
| text_score = ( | |
| features['writing_quality'] * 0.25 + | |
| features['intent_coherence'] * 0.25 + | |
| features['leadership_score'] * 0.30 + | |
| features['content_depth'] * 0.20 | |
| ) | |
| # Calculate confidence based on response completeness | |
| confidence = self._calculate_confidence(text_q1, text_q2, text_q3) | |
| return text_score, confidence, features | |
| def _assess_writing_quality(self, text: str) -> float: | |
| """Assess writing quality using heuristics""" | |
| if not text or len(text) < 50: | |
| return 0.2 | |
| score = 0.5 # Base score | |
| # Length check (150-300 words ideal) | |
| word_count = len(text.split()) | |
| if 150 <= word_count <= 300: | |
| score += 0.3 | |
| elif 100 <= word_count < 150 or 300 < word_count <= 400: | |
| score += 0.2 | |
| else: | |
| score += 0.1 | |
| # Sentence structure (multiple sentences) | |
| sentences = re.split(r'[.!?]+', text) | |
| if len(sentences) >= 5: | |
| score += 0.1 | |
| # Proper capitalization | |
| if text[0].isupper(): | |
| score += 0.05 | |
| # No excessive repetition | |
| words = text.lower().split() | |
| unique_ratio = len(set(words)) / len(words) if words else 0 | |
| if unique_ratio > 0.6: | |
| score += 0.05 | |
| return min(score, 1.0) | |
| def _assess_intent_coherence(self, text: str) -> float: | |
| """Assess career intent coherence using embeddings""" | |
| if not text or len(text) < 50: | |
| return 0.2 | |
| # Encode the response | |
| response_embedding = self.model.encode([text])[0] | |
| # Calculate similarity with reference career embeddings | |
| similarities = [] | |
| for ref_emb in self.reference_embeddings['career']: | |
| similarity = np.dot(response_embedding, ref_emb) / ( | |
| np.linalg.norm(response_embedding) * np.linalg.norm(ref_emb) | |
| ) | |
| similarities.append(similarity) | |
| # Take max similarity | |
| max_similarity = max(similarities) if similarities else 0 | |
| # Normalize to 0-1 (cosine similarity is -1 to 1) | |
| score = (max_similarity + 1) / 2 | |
| return score | |
| def _assess_leadership(self, text: str) -> float: | |
| """Assess leadership based on keywords""" | |
| if not text or len(text) < 50: | |
| return 0.2 | |
| text_lower = text.lower() | |
| # Count leadership keywords | |
| keyword_count = sum(1 for keyword in self.leadership_keywords if keyword in text_lower) | |
| # Base score on keyword presence | |
| if keyword_count >= 3: | |
| score = 1.0 | |
| elif keyword_count == 2: | |
| score = 0.8 | |
| elif keyword_count == 1: | |
| score = 0.6 | |
| else: | |
| score = 0.3 | |
| # Bonus for specific leadership phrases | |
| if 'led a team' in text_lower or 'team lead' in text_lower: | |
| score = min(score + 0.1, 1.0) | |
| return score | |
| def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float: | |
| """Assess overall content depth""" | |
| total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split()) | |
| if total_words >= 450: # 150+ words each | |
| return 1.0 | |
| elif total_words >= 300: | |
| return 0.8 | |
| elif total_words >= 200: | |
| return 0.6 | |
| elif total_words >= 100: | |
| return 0.4 | |
| else: | |
| return 0.2 | |
| def _calculate_confidence(self, text_q1: str, text_q2: str, text_q3: str) -> float: | |
| """Calculate confidence based on completeness""" | |
| scores = [] | |
| for text in [text_q1, text_q2, text_q3]: | |
| if not text: | |
| scores.append(0) | |
| elif len(text) < 50: | |
| scores.append(0.3) | |
| elif len(text) < 100: | |
| scores.append(0.6) | |
| else: | |
| scores.append(1.0) | |
| return np.mean(scores) | |
| def explain(self, features: Dict) -> Dict: | |
| """Generate explanation for text scores""" | |
| explanations = { | |
| 'highlights': [], | |
| 'suggestions': [] | |
| } | |
| # Highlights | |
| if features.get('writing_quality', 0) > 0.7: | |
| explanations['highlights'].append("Strong writing quality with clear communication") | |
| if features.get('leadership_score', 0) > 0.7: | |
| explanations['highlights'].append("Demonstrated leadership experience and initiative") | |
| if features.get('intent_coherence', 0) > 0.7: | |
| explanations['highlights'].append("Clear and coherent career goals") | |
| # Suggestions | |
| if features.get('writing_quality', 0) < 0.5: | |
| explanations['suggestions'].append("Provide more detailed responses (aim for 150-300 words each)") | |
| if features.get('leadership_score', 0) < 0.5: | |
| explanations['suggestions'].append("Highlight specific leadership roles and their impact") | |
| if features.get('content_depth', 0) < 0.5: | |
| explanations['suggestions'].append("Include more specific examples and achievements") | |
| return explanations | |