Spaces:
Build error
Build error
| """ | |
| RoBERTa-based sentiment analysis for comparing LLM responses | |
| """ | |
| import torch | |
| import numpy as np # ended up not using, but left in case I need it later. | |
| from transformers import RobertaTokenizer, RobertaForSequenceClassification | |
| import nltk | |
| from nltk.tokenize import sent_tokenize | |
| # Global variables to store models once loaded | |
| ROBERTA_TOKENIZER = None | |
| ROBERTA_MODEL = None | |
| def ensure_nltk_resources(): | |
| """Make sure necessary NLTK resources are downloaded""" | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| except LookupError: | |
| nltk.download('punkt', quiet=True) | |
| def load_roberta_model(): | |
| """ | |
| Load the RoBERTa model and tokenizer for sentiment analysis | |
| Returns: | |
| tuple: (tokenizer, model) for RoBERTa sentiment analysis | |
| """ | |
| global ROBERTA_TOKENIZER, ROBERTA_MODEL | |
| # Return cached model if already loaded | |
| if ROBERTA_TOKENIZER is not None and ROBERTA_MODEL is not None: | |
| return ROBERTA_TOKENIZER, ROBERTA_MODEL | |
| print("Loading RoBERTa model and tokenizer...") | |
| try: | |
| # Load tokenizer and model for sentiment analysis | |
| ROBERTA_TOKENIZER = RobertaTokenizer.from_pretrained('roberta-base') | |
| ROBERTA_MODEL = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli') | |
| return ROBERTA_TOKENIZER, ROBERTA_MODEL | |
| except Exception as e: | |
| print(f"Error loading RoBERTa model: {str(e)}") | |
| # Return None values if loading fails | |
| return None, None | |
| def analyze_sentiment_roberta(text): | |
| """ | |
| Analyze sentiment using RoBERTa model | |
| Args: | |
| text (str): Text to analyze | |
| Returns: | |
| dict: Sentiment analysis results with label and scores | |
| """ | |
| ensure_nltk_resources() | |
| # Handle empty text | |
| if not text or not text.strip(): | |
| return { | |
| "label": "neutral", | |
| "scores": { | |
| "contradiction": 0.33, | |
| "neutral": 0.34, | |
| "entailment": 0.33 | |
| }, | |
| "sentiment_score": 0.0, | |
| "sentence_scores": [] | |
| } | |
| # Load model | |
| tokenizer, model = load_roberta_model() | |
| if tokenizer is None or model is None: | |
| return { | |
| "error": "Failed to load RoBERTa model", | |
| "label": "neutral", | |
| "scores": { | |
| "contradiction": 0.33, | |
| "neutral": 0.34, | |
| "entailment": 0.33 | |
| }, | |
| "sentiment_score": 0.0 | |
| } | |
| try: | |
| # Set device | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| model.to(device) | |
| # Process the whole text | |
| encoded_text = tokenizer(text, return_tensors='pt', truncation=True, max_length=512) | |
| encoded_text = {k: v.to(device) for k, v in encoded_text.items()} | |
| with torch.no_grad(): | |
| outputs = model(**encoded_text) | |
| predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| # Get prediction | |
| contradiction_score = predictions[0, 0].item() | |
| neutral_score = predictions[0, 1].item() | |
| entailment_score = predictions[0, 2].item() | |
| # Map to sentiment | |
| # contradiction = negative, entailment = positive, with a scale | |
| sentiment_score = (entailment_score - contradiction_score) * 2 # Scale from -2 to 2 | |
| # Determine sentiment label | |
| if sentiment_score > 0.5: | |
| label = "positive" | |
| elif sentiment_score < -0.5: | |
| label = "negative" | |
| else: | |
| label = "neutral" | |
| # Analyze individual sentences if text is long enough | |
| sentences = sent_tokenize(text) | |
| sentence_scores = [] | |
| # Only process sentences if there are more than one and text is substantial | |
| if len(sentences) > 1 and len(text) > 100: | |
| for sentence in sentences: | |
| if len(sentence.split()) >= 3: # Only analyze meaningful sentences | |
| encoded_sentence = tokenizer(sentence, return_tensors='pt', truncation=True) | |
| encoded_sentence = {k: v.to(device) for k, v in encoded_sentence.items()} | |
| with torch.no_grad(): | |
| outputs = model(**encoded_sentence) | |
| predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| # Calculate sentence sentiment score | |
| sent_contradiction = predictions[0, 0].item() | |
| sent_neutral = predictions[0, 1].item() | |
| sent_entailment = predictions[0, 2].item() | |
| sent_score = (sent_entailment - sent_contradiction) * 2 | |
| # Determine sentiment label for this sentence | |
| if sent_score > 0.5: | |
| sent_label = "positive" | |
| elif sent_score < -0.5: | |
| sent_label = "negative" | |
| else: | |
| sent_label = "neutral" | |
| sentence_scores.append({ | |
| "text": sentence, | |
| "score": sent_score, | |
| "label": sent_label, | |
| "scores": { | |
| "contradiction": sent_contradiction, | |
| "neutral": sent_neutral, | |
| "entailment": sent_entailment | |
| } | |
| }) | |
| return { | |
| "label": label, | |
| "scores": { | |
| "contradiction": contradiction_score, | |
| "neutral": neutral_score, | |
| "entailment": entailment_score | |
| }, | |
| "sentiment_score": sentiment_score, | |
| "sentence_scores": sentence_scores | |
| } | |
| except Exception as e: | |
| import traceback | |
| print(f"Error analyzing sentiment with RoBERTa: {str(e)}") | |
| print(traceback.format_exc()) | |
| return { | |
| "error": str(e), | |
| "label": "neutral", | |
| "scores": { | |
| "contradiction": 0.33, | |
| "neutral": 0.34, | |
| "entailment": 0.33 | |
| }, | |
| "sentiment_score": 0.0 | |
| } | |
| def compare_sentiment_roberta(texts, model_names=None): | |
| """ | |
| Compare sentiment between two texts using RoBERTa | |
| """ | |
| print(f"Starting sentiment comparison for {len(texts)} texts") | |
| # Set default model names if not provided | |
| if model_names is None or len(model_names) < 2: | |
| model_names = ["Model 1", "Model 2"] | |
| # Handle case with fewer than 2 texts | |
| if len(texts) < 2: | |
| return { | |
| "error": "Need at least 2 texts to compare", | |
| "models": model_names[:len(texts)] | |
| } | |
| # Get sentiment analysis for each text | |
| sentiment_results = [] | |
| for text in texts: | |
| sentiment_results.append(analyze_sentiment_roberta(text)) | |
| # Create result dictionary | |
| result = { | |
| "models": model_names[:len(texts)], | |
| "sentiment_analysis": {} | |
| } | |
| # Add individual model results | |
| for i, model_name in enumerate(model_names[:len(texts)]): | |
| result["sentiment_analysis"][model_name] = sentiment_results[i] | |
| # Compare sentiment scores | |
| if len(sentiment_results) >= 2: | |
| model1_name, model2_name = model_names[0], model_names[1] | |
| # Add null checks for the sentiment results | |
| score1 = 0 | |
| score2 = 0 | |
| if sentiment_results[0] and "sentiment_score" in sentiment_results[0]: | |
| score1 = sentiment_results[0]["sentiment_score"] | |
| if sentiment_results[1] and "sentiment_score" in sentiment_results[1]: | |
| score2 = sentiment_results[1]["sentiment_score"] | |
| # Calculate difference and determine which is more positive/negative | |
| difference = abs(score1 - score2) | |
| result["comparison"] = { | |
| "sentiment_difference": difference, | |
| "significant_difference": difference > 0.5, # Threshold for significant difference | |
| } | |
| if score1 > score2: | |
| result["comparison"]["more_positive"] = model1_name | |
| result["comparison"]["more_negative"] = model2_name | |
| result["comparison"]["difference_direction"] = f"{model1_name} is more positive than {model2_name}" | |
| elif score2 > score1: | |
| result["comparison"]["more_positive"] = model2_name | |
| result["comparison"]["more_negative"] = model1_name | |
| result["comparison"]["difference_direction"] = f"{model2_name} is more positive than {model1_name}" | |
| else: | |
| result["comparison"]["equal_sentiment"] = True | |
| result["comparison"]["difference_direction"] = f"{model1_name} and {model2_name} have similar sentiment" | |
| return result | |