Spaces:
Sleeping
Sleeping
| from typing import Dict, List, Set | |
| import evaluate | |
| from datasets import Features, Sequence, Value | |
| from sklearn.metrics import accuracy_score | |
| from itertools import chain | |
| from random import choice | |
| from typing import Any, Dict, List, Optional, Tuple | |
| _CITATION = """ | |
| """ | |
| _DESCRIPTION = """ | |
| This module provides evaluation metrics for Aspect-Based Sentiment Analysis (ABSA). | |
| The metrics include precision, recall, and F1 score for both aspect terms and category detection. | |
| Additionally it calculates the accuracy for polarities from aspect terms and category detection. | |
| ABSA evaluates the capability of a model to identify and correctly classify the sentiment of specific aspects within a text. | |
| """ | |
| _KWARGS_DESCRIPTION = """ | |
| Computes precision, recall, and F1 score for aspect terms and category detection in Aspect-Based Sentiment Analysis (ABSA). Also calculates de accuracy for polarities on each task. | |
| Args: | |
| predictions: List of ABSA predictions with the following structure: | |
| - 'aspects': Sequence of aspect annotations, each with the following keys: | |
| - 'term': Aspect term | |
| - 'polarity': Polarity of the aspect term | |
| - 'category': Sequence of category annotations, each with the following keys: | |
| - 'category': Category | |
| - 'polarity': polarity of the category | |
| references: List of ABSA references with the same structure as predictions. | |
| Examples for predictions: | |
| [ | |
| { | |
| "aspects": [ | |
| {"term": "battery life", "polarity": "positive"}, | |
| {"term": "camera", "polarity": "negative"} | |
| ], | |
| "category": [ | |
| {"category": "Battery", "polarity": "positive"}, | |
| {"category": "Camera", "polarity": "negative"} | |
| ] | |
| } | |
| ] | |
| Returns: | |
| term_extraction_results: f1 score, precision and recall for aspect terms | |
| term_polarity_results_accuracy: accuracy for polarities on aspect terms | |
| category_detection_results: f1 score, precision and recall for category detection | |
| category_polarity_results_accuracy: accuracy for polarities on categories | |
| """ | |
| class AbsaEvaluator(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=Features( | |
| { | |
| "predictions": Features( | |
| { | |
| "aspects": Features( | |
| { | |
| "term": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| "category": Features( | |
| { | |
| "category": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| } | |
| ), | |
| "references": Features( | |
| { | |
| "aspects": Features( | |
| { | |
| "term": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| "category": Features( | |
| { | |
| "category": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| } | |
| ), | |
| } | |
| ), | |
| ) | |
| def _compute(self, predictions, references): | |
| # preprocess aspect term | |
| ( | |
| truth_aspect_terms, | |
| pred_aspect_terms, | |
| truth_term_polarities, | |
| pred_term_polarities, | |
| ) = absa_term_preprocess( | |
| references=references, | |
| predictions=predictions, | |
| subtask_key="aspects", | |
| subtask_value="term", | |
| ) | |
| # evaluate | |
| term_results = self.semeval_metric( | |
| truth_aspect_terms, pred_aspect_terms | |
| ) | |
| term_polarity_acc = accuracy_score( | |
| truth_term_polarities, pred_term_polarities | |
| ) | |
| # preprocess category detection | |
| ( | |
| truth_categories, | |
| pred_categories, | |
| truth_cat_polarities, | |
| pred_cat_polarities, | |
| ) = absa_term_preprocess( | |
| references=references, | |
| predictions=predictions, | |
| subtask_key="category", | |
| subtask_value="category", | |
| ) | |
| # evaluate | |
| category_results = self.semeval_metric( | |
| truth_categories, pred_categories | |
| ) | |
| cat_polarity_acc = accuracy_score( | |
| truth_cat_polarities, pred_cat_polarities | |
| ) | |
| return { | |
| "term_extraction_results": term_results, | |
| "term_polarity_results_accuracy": term_polarity_acc, | |
| "category_detection_results": category_results, | |
| "category_polarity_results_accuracy": cat_polarity_acc, | |
| } | |
| def semeval_metric( | |
| self, truths: List[List[str]], preds: List[List[str]] | |
| ) -> Dict[str, float]: | |
| """ | |
| Implements evaluation for extraction tasks using precision, recall, and F1 score. | |
| Parameters: | |
| - truths: List of lists, where each list contains the ground truth labels for a sample. | |
| - preds: List of lists, where each list contains the predicted labels for a sample. | |
| Returns: | |
| - A dictionary containing the precision, recall, F1 score, and counts of common, retrieved, and relevant. | |
| link for this code: https://github.com/davidsbatista/Aspect-Based-Sentiment-Analysis/blob/1d9c8ec1131993d924e96676fa212db6b53cb870/libraries/baselines.py#L387 | |
| """ | |
| b = 1 | |
| common, relevant, retrieved = 0.0, 0.0, 0.0 | |
| for truth, pred in zip(truths, preds): | |
| common += len([a for a in pred if a in truth]) | |
| retrieved += len(pred) | |
| relevant += len(truth) | |
| precision = common / retrieved if retrieved > 0 else 0.0 | |
| recall = common / relevant if relevant > 0 else 0.0 | |
| f1 = ( | |
| (1 + (b**2)) | |
| * precision | |
| * recall | |
| / ((precision * b**2) + recall) | |
| if precision > 0 and recall > 0 | |
| else 0.0 | |
| ) | |
| return { | |
| "precision": precision, | |
| "recall": recall, | |
| "f1_score": f1, | |
| "common": common, | |
| "retrieved": retrieved, | |
| "relevant": relevant, | |
| } | |
| def adjust_predictions( | |
| refs: List[List[Any]], preds: List[List[Any]], choices: Set[Any] | |
| ) -> List[List[Any]]: | |
| """Adjust predictions to match the length of references with either a special token or random choice.""" | |
| choices_list = list(choices) | |
| adjusted_preds = [] | |
| for ref, pred in zip(refs, preds): | |
| if len(pred) < len(ref): | |
| missing_count = len(ref) - len(pred) | |
| pred.extend([choice(choices_list) for _ in range(missing_count)]) | |
| elif len(pred) > len(ref): | |
| pred = pred[:len(ref)] | |
| adjusted_preds.append(pred) | |
| return adjusted_preds | |
| def extract_aspects( | |
| data: List[Dict[str, Dict[str, Any]]], specific_key: str, specific_val: str | |
| ) -> List[List[Any]]: | |
| """Extracts and returns a list of specified aspect details from the nested 'aspects' data.""" | |
| return [item[specific_key][specific_val] for item in data] | |
| def absa_term_preprocess( | |
| references: List[Dict[str, Any]], | |
| predictions: List[Dict[str, Any]], | |
| subtask_key: str, | |
| subtask_value: str, | |
| ) -> Tuple[List[str], List[str], List[str], List[str]]: | |
| """ | |
| Preprocess the terms and polarities for aspect-based sentiment analysis. | |
| Args: | |
| references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'. | |
| predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments. | |
| subtask_key (str): The key under which aspects are stored. | |
| subtask_value (str): The specific aspect value to extract. | |
| Returns: | |
| Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms, | |
| adjusted predicted aspect terms, true polarities, and adjusted predicted polarities. | |
| """ | |
| # Extract aspect terms and polarities | |
| truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value) | |
| pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value) | |
| truth_polarities = extract_aspects(references, subtask_key, "polarity") | |
| pred_polarities = extract_aspects(predictions, subtask_key, "polarity") | |
| # Define adjustment parameters | |
| special_token = "NONE" # For missing aspect terms | |
| sentiment_choices = set(flatten_list(truth_polarities)) | |
| # Adjust the predictions to match the length of references | |
| adjusted_pred_terms = adjust_predictions( | |
| truth_aspect_terms, pred_aspect_terms, [special_token] | |
| ) | |
| adjusted_pred_polarities = adjust_predictions( | |
| truth_polarities, pred_polarities, sentiment_choices | |
| ) | |
| return ( | |
| flatten_list(truth_aspect_terms), | |
| flatten_list(adjusted_pred_terms), | |
| flatten_list(truth_polarities), | |
| flatten_list(adjusted_pred_polarities), | |
| ) | |
| def flatten_list(nested_list): | |
| """Flatten a nested list into a single-level list.""" | |
| return list(chain.from_iterable(nested_list)) |