FCT / services /text_module.py
Parthnuwal7
Adding analytical content
3d015cd
"""Text Embeddings Module - NLP-based Scoring"""
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import Dict, Tuple
import re
class TextModule:
"""Scores text responses using SBERT embeddings and heuristics"""
def __init__(self):
# Load SBERT model
self.model = SentenceTransformer('all-MiniLM-L6-v2')
# Reference embeddings for ideal responses
self.reference_embeddings = {
'strengths': self.model.encode([
"I have strong technical skills in programming, problem-solving, and software development",
"My strengths include leadership, communication, and analytical thinking",
"I excel at teamwork, project management, and innovative solutions"
]),
'career': self.model.encode([
"I am interested in software engineering and technology innovation",
"I want to work in data science and machine learning",
"My goal is to become a product manager and lead technical teams"
])
}
# Leadership keywords
self.leadership_keywords = [
'lead', 'leader', 'leadership', 'managed', 'organized', 'president',
'head', 'coordinator', 'captain', 'founded', 'initiated', 'directed'
]
def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]:
"""
Calculate text score from 3 textual responses
Returns: (score, confidence, features)
"""
features = {}
text_q1 = text_responses.get('text_q1', '')
text_q2 = text_responses.get('text_q2', '')
text_q3 = text_responses.get('text_q3', '')
# Feature 1: Writing quality (text_q1 - strengths)
features['writing_quality'] = self._assess_writing_quality(text_q1)
# Feature 2: Intent coherence (text_q2 - career interests)
features['intent_coherence'] = self._assess_intent_coherence(text_q2)
# Feature 3: Leadership flag (text_q3 - extracurriculars)
features['leadership_score'] = self._assess_leadership(text_q3)
# Feature 4: Content depth (all responses)
features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3)
# Calculate overall text score
text_score = (
features['writing_quality'] * 0.25 +
features['intent_coherence'] * 0.25 +
features['leadership_score'] * 0.30 +
features['content_depth'] * 0.20
)
# Calculate confidence based on response completeness
confidence = self._calculate_confidence(text_q1, text_q2, text_q3)
return text_score, confidence, features
def _assess_writing_quality(self, text: str) -> float:
"""Assess writing quality using heuristics"""
if not text or len(text) < 50:
return 0.2
score = 0.5 # Base score
# Length check (150-300 words ideal)
word_count = len(text.split())
if 150 <= word_count <= 300:
score += 0.3
elif 100 <= word_count < 150 or 300 < word_count <= 400:
score += 0.2
else:
score += 0.1
# Sentence structure (multiple sentences)
sentences = re.split(r'[.!?]+', text)
if len(sentences) >= 5:
score += 0.1
# Proper capitalization
if text[0].isupper():
score += 0.05
# No excessive repetition
words = text.lower().split()
unique_ratio = len(set(words)) / len(words) if words else 0
if unique_ratio > 0.6:
score += 0.05
return min(score, 1.0)
def _assess_intent_coherence(self, text: str) -> float:
"""Assess career intent coherence using embeddings"""
if not text or len(text) < 50:
return 0.2
# Encode the response
response_embedding = self.model.encode([text])[0]
# Calculate similarity with reference career embeddings
similarities = []
for ref_emb in self.reference_embeddings['career']:
similarity = np.dot(response_embedding, ref_emb) / (
np.linalg.norm(response_embedding) * np.linalg.norm(ref_emb)
)
similarities.append(similarity)
# Take max similarity
max_similarity = max(similarities) if similarities else 0
# Normalize to 0-1 (cosine similarity is -1 to 1)
score = (max_similarity + 1) / 2
return score
def _assess_leadership(self, text: str) -> float:
"""Assess leadership based on keywords"""
if not text or len(text) < 50:
return 0.2
text_lower = text.lower()
# Count leadership keywords
keyword_count = sum(1 for keyword in self.leadership_keywords if keyword in text_lower)
# Base score on keyword presence
if keyword_count >= 3:
score = 1.0
elif keyword_count == 2:
score = 0.8
elif keyword_count == 1:
score = 0.6
else:
score = 0.3
# Bonus for specific leadership phrases
if 'led a team' in text_lower or 'team lead' in text_lower:
score = min(score + 0.1, 1.0)
return score
def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float:
"""Assess overall content depth"""
total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split())
if total_words >= 450: # 150+ words each
return 1.0
elif total_words >= 300:
return 0.8
elif total_words >= 200:
return 0.6
elif total_words >= 100:
return 0.4
else:
return 0.2
def _calculate_confidence(self, text_q1: str, text_q2: str, text_q3: str) -> float:
"""Calculate confidence based on completeness"""
scores = []
for text in [text_q1, text_q2, text_q3]:
if not text:
scores.append(0)
elif len(text) < 50:
scores.append(0.3)
elif len(text) < 100:
scores.append(0.6)
else:
scores.append(1.0)
return np.mean(scores)
def explain(self, features: Dict) -> Dict:
"""Generate explanation for text scores"""
explanations = {
'highlights': [],
'suggestions': []
}
# Highlights
if features.get('writing_quality', 0) > 0.7:
explanations['highlights'].append("Strong writing quality with clear communication")
if features.get('leadership_score', 0) > 0.7:
explanations['highlights'].append("Demonstrated leadership experience and initiative")
if features.get('intent_coherence', 0) > 0.7:
explanations['highlights'].append("Clear and coherent career goals")
# Suggestions
if features.get('writing_quality', 0) < 0.5:
explanations['suggestions'].append("Provide more detailed responses (aim for 150-300 words each)")
if features.get('leadership_score', 0) < 0.5:
explanations['suggestions'].append("Highlight specific leadership roles and their impact")
if features.get('content_depth', 0) < 0.5:
explanations['suggestions'].append("Include more specific examples and achievements")
return explanations