""" Answer extraction and sentiment analysis utilities. """ import re import logging from typing import Dict, List, Optional, Any logger = logging.getLogger(__name__) class SentimentAnalyzer: """Analyze sentiment of text responses.""" def __init__(self, method: str = "vader"): """ Initialize sentiment analyzer. Args: method: "vader", "textblob", or "transformers" """ self.method = method self._setup() def _setup(self): """Setup the sentiment analyzer.""" if self.method == "vader": try: from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer self.analyzer = SentimentIntensityAnalyzer() except ImportError: logger.warning("VADER not installed, falling back to TextBlob") self.method = "textblob" self._setup() elif self.method == "textblob": try: from textblob import TextBlob self.analyzer = TextBlob except ImportError: logger.error("TextBlob not installed") self.analyzer = None elif self.method == "transformers": try: from transformers import pipeline self.analyzer = pipeline( "sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest" ) except ImportError: logger.warning("Transformers not available, falling back to VADER") self.method = "vader" self._setup() def analyze(self, text: str) -> Dict[str, float]: """ Analyze sentiment of text. Returns: Dict with sentiment scores including 'compound' score """ if not text or not self.analyzer: return {"compound": 0.0} try: if self.method == "vader": scores = self.analyzer.polarity_scores(text) return { "compound": scores["compound"], "positive": scores["pos"], "negative": scores["neg"], "neutral": scores["neu"], } elif self.method == "textblob": blob = self.analyzer(text) return { "compound": blob.sentiment.polarity, "subjectivity": blob.sentiment.subjectivity, } elif self.method == "transformers": # Truncate for model result = self.analyzer(text[:512])[0] # Convert to -1 to 1 scale if result["label"] == "positive": compound = result["score"] elif result["label"] == "negative": compound = -result["score"] else: compound = 0.0 return {"compound": compound, "label": result["label"]} except Exception as e: logger.error(f"Error analyzing sentiment: {e}") return {"compound": 0.0} return {"compound": 0.0} class AnswerExtractor: """Extract structured answers from LLM responses.""" def __init__(self): self.sentiment_analyzer = SentimentAnalyzer() def extract_likert_scale(self, response: str, scale: List[str] = None) -> Optional[int]: """ Extract Likert scale response from text. Args: response: LLM response text scale: List of scale options (e.g., ["Strongly Disagree", "Disagree", ...]) Returns: Scale index (0-based) or None if not found """ if scale is None: scale = [ "strongly disagree", "disagree", "neutral", "agree", "strongly agree" ] response_lower = response.lower() for i, option in enumerate(scale): if option.lower() in response_lower: return i return None def extract_sentiment_score(self, response: str) -> float: """Extract sentiment score from response.""" result = self.sentiment_analyzer.analyze(response) return result.get("compound", 0.0) def extract_number(self, response: str) -> Optional[float]: """Extract a number from response.""" # Look for numbers in various formats patterns = [ r'[-+]?\d*\.?\d+', # Basic numbers r'[-+]?\d+/\d+', # Fractions ] for pattern in patterns: match = re.search(pattern, response) if match: try: value = match.group() if '/' in value: num, denom = value.split('/') return float(num) / float(denom) return float(value) except: continue return None def extract_agreement(self, response: str) -> Optional[str]: """ Extract agreement level from response. Returns: "agree", "disagree", "neutral", or None """ response_lower = response.lower() # Check for strong indicators strong_agree = ["strongly agree", "completely agree", "absolutely agree", "fully agree"] strong_disagree = ["strongly disagree", "completely disagree", "absolutely disagree"] for phrase in strong_agree: if phrase in response_lower: return "strongly_agree" for phrase in strong_disagree: if phrase in response_lower: return "strongly_disagree" # Check for basic agreement/disagreement if "disagree" in response_lower: return "disagree" if "agree" in response_lower: return "agree" # Check for neutral indicators neutral_phrases = ["neutral", "neither agree nor disagree", "no opinion", "uncertain"] for phrase in neutral_phrases: if phrase in response_lower: return "neutral" return None def extract_political_position(self, response: str) -> Dict[str, float]: """ Extract political position from response. Returns: Dict with 'economic' and 'social' scores (-1 to 1) """ # This is a simplified extraction - in practice would need more sophisticated analysis sentiment = self.sentiment_analyzer.analyze(response) compound = sentiment.get("compound", 0.0) # Look for political keywords left_keywords = ["progressive", "liberal", "socialist", "equality", "regulation"] right_keywords = ["conservative", "traditional", "free market", "liberty", "deregulation"] auth_keywords = ["order", "security", "control", "tradition", "authority"] lib_keywords = ["freedom", "individual", "privacy", "autonomy", "choice"] response_lower = response.lower() # Calculate economic score left_count = sum(1 for kw in left_keywords if kw in response_lower) right_count = sum(1 for kw in right_keywords if kw in response_lower) economic = (right_count - left_count) / max(left_count + right_count, 1) # Calculate social score auth_count = sum(1 for kw in auth_keywords if kw in response_lower) lib_count = sum(1 for kw in lib_keywords if kw in response_lower) social = (auth_count - lib_count) / max(auth_count + lib_count, 1) return { "economic": economic, # -1 = left, +1 = right "social": social, # -1 = libertarian, +1 = authoritarian "compound_sentiment": compound, } def generate_extraction_message(response: str, scale: List[str]) -> str: """ Generate a prompt for extracting structured answer from response. Args: response: The LLM response to extract from scale: The answer scale options Returns: Prompt for extraction """ scale_str = ", ".join([f"{i+1}={opt}" for i, opt in enumerate(scale)]) return f"""Given the following response, extract the answer on this scale: {scale_str} Response: {response} The answer is (respond with only the number):"""