Spaces:

divAIne
/

busy-module-audio

Sleeping

App Files Files Community

busy-module-audio / text_features.py

EurekaPotato

Upload folder using huggingface_hub

3469c65 verified about 1 month ago

raw

history blame contribute delete

16.3 kB

	"""
	Text Feature Extractor - IMPROVED VERSION
	Extracts 9 text features from conversation transcripts to detect busy/distracted states.

	KEY IMPROVEMENTS:
	1. Uses NLI model for intent classification (understands "not busy" properly)
	2. Handles negation, context, and sarcasm
	3. Removes useless t9_latency for single-side audio
	"""

	import numpy as np
	from typing import List, Dict, Tuple
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	import re


	class TextFeatureExtractor:
	"""Extract 9 text features for busy detection"""

	def __init__(self, use_intent_model: bool = True):
	"""
	Initialize NLP models

	Args:
	use_intent_model: If True, use BART-MNLI for intent classification
	If False, fall back to pattern matching
	"""
	self.use_intent_model = use_intent_model

	print("Loading NLP models...")

	# Sentiment model
	model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
	self.sentiment_model = pipeline(
	"sentiment-analysis",
	model=model_name,
	device=-1
	)
	print("[OK] Sentiment model loaded")

	# Coherence model
	self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
	print("[OK] Coherence model loaded")

	# Always setup patterns — busy_keywords is needed by extract_marker_counts()
	self._setup_patterns()

	# Intent classification model (NEW - understands context!)
	if self.use_intent_model:
	try:
	self.intent_classifier = pipeline(
	"zero-shot-classification",
	model="facebook/bart-large-mnli",
	device=-1
	)
	print("[OK] Intent classifier loaded (BART-MNLI)")
	except Exception as e:
	print(f"[WARN] Intent classifier failed to load: {e}")
	print(" Falling back to pattern matching")
	self.use_intent_model = False

	def _setup_patterns(self):
	"""Setup pattern-based matching as fallback"""
	# Negation pattern
	self.negation_pattern = re.compile(
	r'\b(not\|no\|never\|neither\|n\'t\|dont\|don\'t\|cannot\|can\'t\|wont\|won\'t)\s+\w\s(busy\|free\|available\|talk\|rush)',
	re.IGNORECASE
	)

	# Busy patterns (positive assertions)
	self.busy_patterns = [
	r'\b(i\'m\|i am\|im)\s+(busy\|driving\|working\|cooking\|rushing)\b',
	r'\bin a (meeting\|call\|hurry)\b',
	r'\bcan\'t talk\b',
	r'\bcall (you\|me) back\b',
	r'\bnot a good time\b',
	r'\bbad time\b'
	]

	# Free patterns (positive assertions)
	self.free_patterns = [
	r'\b(i\'m\|i am\|im)\s+(free\|available)\b',
	r'\bcan talk\b',
	r'\bhave time\b',
	r'\bnot busy\b',
	r'\bgood time\b',
	r'\bnow works\b'
	]

	# Compile patterns
	self.busy_patterns = [re.compile(p, re.IGNORECASE) for p in self.busy_patterns]
	self.free_patterns = [re.compile(p, re.IGNORECASE) for p in self.free_patterns]

	# Legacy keywords for other features
	self.busy_keywords = {
	'cognitive_load': [
	'um', 'uh', 'like', 'you know', 'i mean', 'kind of',
	'sort of', 'basically', 'actually'
	],
	'time_pressure': [
	'quickly', 'hurry', 'fast', 'urgent', 'asap', 'right now',
	'immediately', 'short', 'brief'
	],
	'deflection': [
	'later', 'another time', 'not now', 'maybe', 'i don\'t know',
	'whatever', 'sure sure', 'yeah yeah'
	]
	}

	def extract_explicit_busy(self, transcript: str) -> float:
	"""
	T1: Explicit Busy Indicators (binary: 0 or 1)

	IMPROVED: Uses NLI model to understand context and negation
	- "I'm busy" → 1.0
	- "I'm not busy" → 0.0
	- "Can't talk right now" → 1.0
	- "I can talk" → 0.0
	"""
	if not transcript or len(transcript.strip()) < 3:
	return 0.0

	# Method 1: Use intent classification model (best)
	if self.use_intent_model:
	try:
	result = self.intent_classifier(
	transcript,
	candidate_labels=["person is busy or occupied",
	"person is free and available",
	"unclear or neutral"],
	hypothesis_template="This {}."
	)

	top_label = result['labels'][0]
	top_score = result['scores'][0]

	# Require high confidence (>0.6) to avoid false positives
	if top_score > 0.6:
	if "busy" in top_label:
	return 1.0
	elif "free" in top_label:
	return 0.0

	return 0.0 # Neutral or low confidence

	except Exception as e:
	print(f"Intent classification failed: {e}")
	# Fall through to pattern matching

	# Method 2: Pattern-based with negation handling (fallback)
	return self._extract_busy_patterns(transcript)

	def _extract_busy_patterns(self, transcript: str) -> float:
	"""Pattern-based busy detection with negation handling"""
	transcript_lower = transcript.lower()

	# Check for negated busy/free statements
	negation_match = self.negation_pattern.search(transcript_lower)
	if negation_match:
	matched_text = negation_match.group(0)
	# "not busy" or "can't be free" etc.
	if any(word in matched_text for word in ['busy', 'rush']):
	return 0.0 # "not busy" = available
	elif any(word in matched_text for word in ['free', 'available', 'talk']):
	return 1.0 # "can't talk" or "not free" = busy

	# Check free patterns first (higher priority)
	for pattern in self.free_patterns:
	if pattern.search(transcript_lower):
	return 0.0

	# Then check busy patterns
	for pattern in self.busy_patterns:
	if pattern.search(transcript_lower):
	return 1.0

	return 0.0

	def extract_explicit_free(self, transcript: str) -> float:
	"""
	T0: Explicit Free Indicators (binary: 0 or 1)

	IMPROVED: Uses same context-aware approach as busy detection
	"""
	if not transcript or len(transcript.strip()) < 3:
	return 0.0

	# Use intent model
	if self.use_intent_model:
	try:
	result = self.intent_classifier(
	transcript,
	candidate_labels=["person is free and available",
	"person is busy or occupied",
	"unclear or neutral"],
	hypothesis_template="This {}."
	)

	top_label = result['labels'][0]
	top_score = result['scores'][0]

	if top_score > 0.6 and "free" in top_label:
	return 1.0

	return 0.0

	except Exception as e:
	print(f"Intent classification failed: {e}")

	# Fallback to patterns
	transcript_lower = transcript.lower()

	for pattern in self.free_patterns:
	if pattern.search(transcript_lower):
	return 1.0

	return 0.0

	def extract_response_patterns(self, transcript_list: List[str]) -> Tuple[float, float]:
	"""
	T2-T3: Average Response Length and Short Response Ratio

	Returns:
	- avg_response_len: Average words per response
	- short_ratio: Fraction of responses with ≤3 words
	"""
	if not transcript_list:
	return 0.0, 0.0

	word_counts = [len(response.split()) for response in transcript_list]

	avg_response_len = np.mean(word_counts)
	short_count = sum(1 for wc in word_counts if wc <= 3)
	short_ratio = short_count / len(word_counts)

	return float(avg_response_len), float(short_ratio)

	def extract_marker_counts(self, transcript: str) -> Tuple[float, float, float]:
	"""
	T4-T6: Cognitive Load, Time Pressure, Deflection markers

	Returns:
	- cognitive_load: Count of filler words / total words
	- time_pressure: Count of urgency markers / total words
	- deflection: Count of deflection phrases / total words
	"""
	transcript_lower = transcript.lower()
	words = transcript.split()
	total_words = len(words)

	if total_words == 0:
	return 0.0, 0.0, 0.0

	# Count markers
	cognitive_load_count = sum(
	1 for keyword in self.busy_keywords['cognitive_load']
	if keyword in transcript_lower
	)

	time_pressure_count = sum(
	1 for keyword in self.busy_keywords['time_pressure']
	if keyword in transcript_lower
	)

	deflection_count = sum(
	1 for keyword in self.busy_keywords['deflection']
	if keyword in transcript_lower
	)

	# Normalize by total words
	cognitive_load = cognitive_load_count / total_words
	time_pressure = time_pressure_count / total_words
	deflection = deflection_count / total_words

	return float(cognitive_load), float(time_pressure), float(deflection)

	def extract_sentiment(self, transcript: str) -> float:
	"""
	T7: Sentiment Polarity (-1 to +1)
	Negative sentiment often indicates stress/frustration
	"""
	if not transcript or len(transcript.strip()) == 0:
	return 0.0

	try:
	result = self.sentiment_model(transcript[:512])[0]
	label = result['label'].lower()
	score = result['score']

	if 'positive' in label:
	return float(score)
	elif 'negative' in label:
	return float(-score)
	else:
	return 0.0

	except Exception as e:
	print(f"Sentiment extraction error: {e}")
	return 0.0

	def extract_coherence(self, question: str, responses: List[str]) -> float:
	"""
	T8: Coherence Score (0 to 1)
	Measures how relevant responses are to the question
	Low coherence = distracted/not paying attention
	"""
	if not question or not responses:
	return 0.5 # Neutral if no data (changed from 1.0 to be more conservative)

	try:
	# Encode question and responses
	question_embedding = self.coherence_model.encode(question, convert_to_tensor=True)
	response_embeddings = self.coherence_model.encode(responses, convert_to_tensor=True)

	# Calculate cosine similarity
	from sentence_transformers import util
	similarities = util.cos_sim(question_embedding, response_embeddings)[0]

	# Average similarity as coherence score
	coherence = float(np.mean(similarities.cpu().numpy()))

	return max(0.0, min(1.0, coherence)) # Clamp to [0, 1]
	except Exception as e:
	print(f"Coherence extraction error: {e}")
	return 0.5

	def extract_latency(self, events: List[Dict]) -> float:
	"""
	T9: Average Response Latency (seconds)

	⚠️ WARNING: This feature is USELESS for single-side audio!
	Always returns 0.0 since we don't have agent questions.
	Kept for compatibility with existing models.

	events: List of dicts with 'timestamp' and 'speaker' keys
	"""
	# Always return 0 for single-side audio
	return 0.0

	def extract_all(
	self,
	transcript_list: List[str],
	full_transcript: str = "",
	question: str = "",
	events: List[Dict] = None
	) -> Dict[str, float]:
	"""
	Extract all 9 text features

	Args:
	transcript_list: List of individual responses (can be single item for one-turn)
	full_transcript: Complete conversation text
	question: The question/prompt from agent (for coherence)
	events: List of timestamped events (unused for single-side audio)

	Returns:
	Dict with keys: t0_explicit_free, t1_explicit_busy,
	t2_avg_resp_len, t3_short_ratio,
	t4_cognitive_load, t5_time_pressure, t6_deflection,
	t7_sentiment, t8_coherence, t9_latency
	"""
	features = {}

	# Use full transcript if not provided separately
	if not full_transcript:
	full_transcript = " ".join(transcript_list)

	# T0-T1: Explicit indicators (IMPROVED with NLI)
	features['t0_explicit_free'] = self.extract_explicit_free(full_transcript)
	features['t1_explicit_busy'] = self.extract_explicit_busy(full_transcript)

	# T2-T3: Response patterns
	avg_len, short_ratio = self.extract_response_patterns(transcript_list)
	features['t2_avg_resp_len'] = avg_len
	features['t3_short_ratio'] = short_ratio

	# T4-T6: Markers
	cog_load, time_press, deflect = self.extract_marker_counts(full_transcript)
	features['t4_cognitive_load'] = cog_load
	features['t5_time_pressure'] = time_press
	features['t6_deflection'] = deflect

	# T7: Sentiment
	features['t7_sentiment'] = self.extract_sentiment(full_transcript)

	# T8: Coherence (default to 0.5 if no question provided)
	if question:
	features['t8_coherence'] = self.extract_coherence(question, transcript_list)
	else:
	features['t8_coherence'] = 0.5 # Neutral

	# T9: Latency (ALWAYS 0 for single-side audio)
	features['t9_latency'] = 0.0

	return features


	if __name__ == "__main__":
	# Test the extractor
	print("Initializing Text Feature Extractor...")
	extractor = TextFeatureExtractor(use_intent_model=True)

	# Test cases for intent classification
	test_cases = [
	"I'm driving right now",
	"I'm not busy at all",
	"Can't talk, in a meeting",
	"I can talk now",
	"Not a good time",
	"I have time to chat"
	]

	print("\nTesting intent classification:")
	for test in test_cases:
	busy_score = extractor.extract_explicit_busy(test)
	free_score = extractor.extract_explicit_free(test)
	print(f" '{test}'")
	print(f" → Busy: {busy_score:.1f}, Free: {free_score:.1f}")

	# Full feature extraction
	print("\nFull feature extraction:")
	features = extractor.extract_all(
	transcript_list=["I'm not busy", "I can talk now"],
	full_transcript="I'm not busy. I can talk now.",
	question="How are you doing today?"
	)

	print("\nExtracted features:")
	for key, value in features.items():
	print(f" {key}: {value:.3f}")