Spaces:

parthnuwal7
/

FCT

Sleeping

FCT / services /text_module.py

Parthnuwal7

Adding analytical content

3d015cd 3 months ago

7.81 kB

	"""Text Embeddings Module - NLP-based Scoring"""
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from typing import Dict, Tuple
	import re

	class TextModule:
	"""Scores text responses using SBERT embeddings and heuristics"""

	def __init__(self):
	# Load SBERT model
	self.model = SentenceTransformer('all-MiniLM-L6-v2')

	# Reference embeddings for ideal responses
	self.reference_embeddings = {
	'strengths': self.model.encode([
	"I have strong technical skills in programming, problem-solving, and software development",
	"My strengths include leadership, communication, and analytical thinking",
	"I excel at teamwork, project management, and innovative solutions"
	]),
	'career': self.model.encode([
	"I am interested in software engineering and technology innovation",
	"I want to work in data science and machine learning",
	"My goal is to become a product manager and lead technical teams"
	])
	}

	# Leadership keywords
	self.leadership_keywords = [
	'lead', 'leader', 'leadership', 'managed', 'organized', 'president',
	'head', 'coordinator', 'captain', 'founded', 'initiated', 'directed'
	]

	def score(self, text_responses: Dict[str, str]) -> Tuple[float, float, Dict]:
	"""
	Calculate text score from 3 textual responses
	Returns: (score, confidence, features)
	"""
	features = {}

	text_q1 = text_responses.get('text_q1', '')
	text_q2 = text_responses.get('text_q2', '')
	text_q3 = text_responses.get('text_q3', '')

	# Feature 1: Writing quality (text_q1 - strengths)
	features['writing_quality'] = self._assess_writing_quality(text_q1)

	# Feature 2: Intent coherence (text_q2 - career interests)
	features['intent_coherence'] = self._assess_intent_coherence(text_q2)

	# Feature 3: Leadership flag (text_q3 - extracurriculars)
	features['leadership_score'] = self._assess_leadership(text_q3)

	# Feature 4: Content depth (all responses)
	features['content_depth'] = self._assess_content_depth(text_q1, text_q2, text_q3)

	# Calculate overall text score
	text_score = (
	features['writing_quality'] * 0.25 +
	features['intent_coherence'] * 0.25 +
	features['leadership_score'] * 0.30 +
	features['content_depth'] * 0.20
	)

	# Calculate confidence based on response completeness
	confidence = self._calculate_confidence(text_q1, text_q2, text_q3)

	return text_score, confidence, features

	def _assess_writing_quality(self, text: str) -> float:
	"""Assess writing quality using heuristics"""
	if not text or len(text) < 50:
	return 0.2

	score = 0.5 # Base score

	# Length check (150-300 words ideal)
	word_count = len(text.split())
	if 150 <= word_count <= 300:
	score += 0.3
	elif 100 <= word_count < 150 or 300 < word_count <= 400:
	score += 0.2
	else:
	score += 0.1

	# Sentence structure (multiple sentences)
	sentences = re.split(r'[.!?]+', text)
	if len(sentences) >= 5:
	score += 0.1

	# Proper capitalization
	if text[0].isupper():
	score += 0.05

	# No excessive repetition
	words = text.lower().split()
	unique_ratio = len(set(words)) / len(words) if words else 0
	if unique_ratio > 0.6:
	score += 0.05

	return min(score, 1.0)

	def _assess_intent_coherence(self, text: str) -> float:
	"""Assess career intent coherence using embeddings"""
	if not text or len(text) < 50:
	return 0.2

	# Encode the response
	response_embedding = self.model.encode([text])[0]

	# Calculate similarity with reference career embeddings
	similarities = []
	for ref_emb in self.reference_embeddings['career']:
	similarity = np.dot(response_embedding, ref_emb) / (
	np.linalg.norm(response_embedding) * np.linalg.norm(ref_emb)
	)
	similarities.append(similarity)

	# Take max similarity
	max_similarity = max(similarities) if similarities else 0

	# Normalize to 0-1 (cosine similarity is -1 to 1)
	score = (max_similarity + 1) / 2

	return score

	def _assess_leadership(self, text: str) -> float:
	"""Assess leadership based on keywords"""
	if not text or len(text) < 50:
	return 0.2

	text_lower = text.lower()

	# Count leadership keywords
	keyword_count = sum(1 for keyword in self.leadership_keywords if keyword in text_lower)

	# Base score on keyword presence
	if keyword_count >= 3:
	score = 1.0
	elif keyword_count == 2:
	score = 0.8
	elif keyword_count == 1:
	score = 0.6
	else:
	score = 0.3

	# Bonus for specific leadership phrases
	if 'led a team' in text_lower or 'team lead' in text_lower:
	score = min(score + 0.1, 1.0)

	return score

	def _assess_content_depth(self, text_q1: str, text_q2: str, text_q3: str) -> float:
	"""Assess overall content depth"""
	total_words = len(text_q1.split()) + len(text_q2.split()) + len(text_q3.split())

	if total_words >= 450: # 150+ words each
	return 1.0
	elif total_words >= 300:
	return 0.8
	elif total_words >= 200:
	return 0.6
	elif total_words >= 100:
	return 0.4
	else:
	return 0.2

	def _calculate_confidence(self, text_q1: str, text_q2: str, text_q3: str) -> float:
	"""Calculate confidence based on completeness"""
	scores = []

	for text in [text_q1, text_q2, text_q3]:
	if not text:
	scores.append(0)
	elif len(text) < 50:
	scores.append(0.3)
	elif len(text) < 100:
	scores.append(0.6)
	else:
	scores.append(1.0)

	return np.mean(scores)

	def explain(self, features: Dict) -> Dict:
	"""Generate explanation for text scores"""
	explanations = {
	'highlights': [],
	'suggestions': []
	}

	# Highlights
	if features.get('writing_quality', 0) > 0.7:
	explanations['highlights'].append("Strong writing quality with clear communication")

	if features.get('leadership_score', 0) > 0.7:
	explanations['highlights'].append("Demonstrated leadership experience and initiative")

	if features.get('intent_coherence', 0) > 0.7:
	explanations['highlights'].append("Clear and coherent career goals")

	# Suggestions
	if features.get('writing_quality', 0) < 0.5:
	explanations['suggestions'].append("Provide more detailed responses (aim for 150-300 words each)")

	if features.get('leadership_score', 0) < 0.5:
	explanations['suggestions'].append("Highlight specific leadership roles and their impact")

	if features.get('content_depth', 0) < 0.5:
	explanations['suggestions'].append("Include more specific examples and achievements")

	return explanations