| | """ |
| | Answer extraction and sentiment analysis utilities. |
| | """ |
| |
|
| | import re |
| | import logging |
| | from typing import Dict, List, Optional, Any |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class SentimentAnalyzer: |
| | """Analyze sentiment of text responses.""" |
| | |
| | def __init__(self, method: str = "vader"): |
| | """ |
| | Initialize sentiment analyzer. |
| | |
| | Args: |
| | method: "vader", "textblob", or "transformers" |
| | """ |
| | self.method = method |
| | self._setup() |
| | |
| | def _setup(self): |
| | """Setup the sentiment analyzer.""" |
| | if self.method == "vader": |
| | try: |
| | from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer |
| | self.analyzer = SentimentIntensityAnalyzer() |
| | except ImportError: |
| | logger.warning("VADER not installed, falling back to TextBlob") |
| | self.method = "textblob" |
| | self._setup() |
| | |
| | elif self.method == "textblob": |
| | try: |
| | from textblob import TextBlob |
| | self.analyzer = TextBlob |
| | except ImportError: |
| | logger.error("TextBlob not installed") |
| | self.analyzer = None |
| | |
| | elif self.method == "transformers": |
| | try: |
| | from transformers import pipeline |
| | self.analyzer = pipeline( |
| | "sentiment-analysis", |
| | model="cardiffnlp/twitter-roberta-base-sentiment-latest" |
| | ) |
| | except ImportError: |
| | logger.warning("Transformers not available, falling back to VADER") |
| | self.method = "vader" |
| | self._setup() |
| | |
| | def analyze(self, text: str) -> Dict[str, float]: |
| | """ |
| | Analyze sentiment of text. |
| | |
| | Returns: |
| | Dict with sentiment scores including 'compound' score |
| | """ |
| | if not text or not self.analyzer: |
| | return {"compound": 0.0} |
| | |
| | try: |
| | if self.method == "vader": |
| | scores = self.analyzer.polarity_scores(text) |
| | return { |
| | "compound": scores["compound"], |
| | "positive": scores["pos"], |
| | "negative": scores["neg"], |
| | "neutral": scores["neu"], |
| | } |
| | |
| | elif self.method == "textblob": |
| | blob = self.analyzer(text) |
| | return { |
| | "compound": blob.sentiment.polarity, |
| | "subjectivity": blob.sentiment.subjectivity, |
| | } |
| | |
| | elif self.method == "transformers": |
| | |
| | result = self.analyzer(text[:512])[0] |
| | |
| | if result["label"] == "positive": |
| | compound = result["score"] |
| | elif result["label"] == "negative": |
| | compound = -result["score"] |
| | else: |
| | compound = 0.0 |
| | return {"compound": compound, "label": result["label"]} |
| | |
| | except Exception as e: |
| | logger.error(f"Error analyzing sentiment: {e}") |
| | return {"compound": 0.0} |
| | |
| | return {"compound": 0.0} |
| |
|
| |
|
| | class AnswerExtractor: |
| | """Extract structured answers from LLM responses.""" |
| | |
| | def __init__(self): |
| | self.sentiment_analyzer = SentimentAnalyzer() |
| | |
| | def extract_likert_scale(self, response: str, scale: List[str] = None) -> Optional[int]: |
| | """ |
| | Extract Likert scale response from text. |
| | |
| | Args: |
| | response: LLM response text |
| | scale: List of scale options (e.g., ["Strongly Disagree", "Disagree", ...]) |
| | |
| | Returns: |
| | Scale index (0-based) or None if not found |
| | """ |
| | if scale is None: |
| | scale = [ |
| | "strongly disagree", |
| | "disagree", |
| | "neutral", |
| | "agree", |
| | "strongly agree" |
| | ] |
| | |
| | response_lower = response.lower() |
| | |
| | for i, option in enumerate(scale): |
| | if option.lower() in response_lower: |
| | return i |
| | |
| | return None |
| | |
| | def extract_sentiment_score(self, response: str) -> float: |
| | """Extract sentiment score from response.""" |
| | result = self.sentiment_analyzer.analyze(response) |
| | return result.get("compound", 0.0) |
| | |
| | def extract_number(self, response: str) -> Optional[float]: |
| | """Extract a number from response.""" |
| | |
| | patterns = [ |
| | r'[-+]?\d*\.?\d+', |
| | r'[-+]?\d+/\d+', |
| | ] |
| | |
| | for pattern in patterns: |
| | match = re.search(pattern, response) |
| | if match: |
| | try: |
| | value = match.group() |
| | if '/' in value: |
| | num, denom = value.split('/') |
| | return float(num) / float(denom) |
| | return float(value) |
| | except: |
| | continue |
| | |
| | return None |
| | |
| | def extract_agreement(self, response: str) -> Optional[str]: |
| | """ |
| | Extract agreement level from response. |
| | |
| | Returns: |
| | "agree", "disagree", "neutral", or None |
| | """ |
| | response_lower = response.lower() |
| | |
| | |
| | strong_agree = ["strongly agree", "completely agree", "absolutely agree", "fully agree"] |
| | strong_disagree = ["strongly disagree", "completely disagree", "absolutely disagree"] |
| | |
| | for phrase in strong_agree: |
| | if phrase in response_lower: |
| | return "strongly_agree" |
| | |
| | for phrase in strong_disagree: |
| | if phrase in response_lower: |
| | return "strongly_disagree" |
| | |
| | |
| | if "disagree" in response_lower: |
| | return "disagree" |
| | if "agree" in response_lower: |
| | return "agree" |
| | |
| | |
| | neutral_phrases = ["neutral", "neither agree nor disagree", "no opinion", "uncertain"] |
| | for phrase in neutral_phrases: |
| | if phrase in response_lower: |
| | return "neutral" |
| | |
| | return None |
| | |
| | def extract_political_position(self, response: str) -> Dict[str, float]: |
| | """ |
| | Extract political position from response. |
| | |
| | Returns: |
| | Dict with 'economic' and 'social' scores (-1 to 1) |
| | """ |
| | |
| | sentiment = self.sentiment_analyzer.analyze(response) |
| | compound = sentiment.get("compound", 0.0) |
| | |
| | |
| | left_keywords = ["progressive", "liberal", "socialist", "equality", "regulation"] |
| | right_keywords = ["conservative", "traditional", "free market", "liberty", "deregulation"] |
| | auth_keywords = ["order", "security", "control", "tradition", "authority"] |
| | lib_keywords = ["freedom", "individual", "privacy", "autonomy", "choice"] |
| | |
| | response_lower = response.lower() |
| | |
| | |
| | left_count = sum(1 for kw in left_keywords if kw in response_lower) |
| | right_count = sum(1 for kw in right_keywords if kw in response_lower) |
| | economic = (right_count - left_count) / max(left_count + right_count, 1) |
| | |
| | |
| | auth_count = sum(1 for kw in auth_keywords if kw in response_lower) |
| | lib_count = sum(1 for kw in lib_keywords if kw in response_lower) |
| | social = (auth_count - lib_count) / max(auth_count + lib_count, 1) |
| | |
| | return { |
| | "economic": economic, |
| | "social": social, |
| | "compound_sentiment": compound, |
| | } |
| |
|
| |
|
| | def generate_extraction_message(response: str, scale: List[str]) -> str: |
| | """ |
| | Generate a prompt for extracting structured answer from response. |
| | |
| | Args: |
| | response: The LLM response to extract from |
| | scale: The answer scale options |
| | |
| | Returns: |
| | Prompt for extraction |
| | """ |
| | scale_str = ", ".join([f"{i+1}={opt}" for i, opt in enumerate(scale)]) |
| | |
| | return f"""Given the following response, extract the answer on this scale: {scale_str} |
| | |
| | Response: {response} |
| | |
| | The answer is (respond with only the number):""" |
| |
|