Spaces:
Runtime error
Runtime error
| from typing import Dict, List | |
| import evaluate | |
| from datasets import Features, Sequence, Value | |
| from sklearn.metrics import accuracy_score | |
| from preprocessing import absa_term_preprocess | |
| _CITATION = """ | |
| """ | |
| _DESCRIPTION = """ | |
| Evaluation metrics for Aspect-Based Sentiment Analysis (ABSA) including precision, recall, and F1 score for aspect terms and polarities. | |
| """ | |
| _KWARGS_DESCRIPTION = """ | |
| Computes precision, recall, and F1 score for aspect terms and polarities in Aspect-Based Sentiment Analysis (ABSA). | |
| Args: | |
| predictions: List of ABSA predictions with the following structure: | |
| - 'aspects': Sequence of aspect annotations, each with the following keys: | |
| - 'term': Aspect term | |
| - 'polarity': Polarity of the aspect term | |
| references: List of ABSA references with the same structure as predictions. | |
| Returns: | |
| aspect_precision: Precision score for aspect terms | |
| aspect_recall: Recall score for aspect terms | |
| aspect_f1: F1 score for aspect terms | |
| polarity_precision: Precision score for aspect polarities | |
| polarity_recall: Recall score for aspect polarities | |
| polarity_f1: F1 score for aspect polarities | |
| """ | |
| class AbsaEvaluatorTest(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=Features( | |
| { | |
| "predictions": Features( | |
| { | |
| "aspects": Features( | |
| { | |
| "term": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| "category": Features( | |
| { | |
| "category": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| } | |
| ), | |
| "references": Features( | |
| { | |
| "aspects": Features( | |
| { | |
| "term": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| "category": Features( | |
| { | |
| "category": Sequence(Value("string")), | |
| "polarity": Sequence(Value("string")), | |
| } | |
| ), | |
| } | |
| ), | |
| } | |
| ), | |
| ) | |
| def _compute(self, predictions, references): | |
| # preprocess aspect term | |
| ( | |
| truth_aspect_terms, | |
| pred_aspect_terms, | |
| truth_term_polarities, | |
| pred_term_polarities, | |
| ) = absa_term_preprocess( | |
| references=references, | |
| predictions=predictions, | |
| subtask_key="aspects", | |
| subtask_value="term", | |
| ) | |
| # evaluate | |
| term_results = self.semeval_metric( | |
| truth_aspect_terms, pred_aspect_terms | |
| ) | |
| term_polarity_acc = accuracy_score( | |
| truth_term_polarities, pred_term_polarities | |
| ) | |
| # preprocess category detection | |
| ( | |
| truth_categories, | |
| pred_categories, | |
| truth_cat_polarities, | |
| pred_cat_polarities, | |
| ) = absa_term_preprocess( | |
| references=references, | |
| predictions=predictions, | |
| subtask_key="category", | |
| subtask_value="category", | |
| ) | |
| # evaluate | |
| category_results = self.semeval_metric( | |
| truth_categories, pred_categories | |
| ) | |
| cat_polarity_acc = accuracy_score( | |
| truth_cat_polarities, pred_cat_polarities | |
| ) | |
| return { | |
| "term_extraction_results": term_results, | |
| "term_polarity_results_accuracy": term_polarity_acc, | |
| "category_detection_results": category_results, | |
| "category_polarity_results_accuracy": cat_polarity_acc, | |
| } | |
| def semeval_metric( | |
| self, truths: List[List[str]], preds: List[List[str]] | |
| ) -> Dict[str, float]: | |
| """ | |
| Implements evaluation for extraction tasks using precision, recall, and F1 score. | |
| Parameters: | |
| - truths: List of lists, where each list contains the ground truth labels for a sample. | |
| - preds: List of lists, where each list contains the predicted labels for a sample. | |
| Returns: | |
| - A dictionary containing the precision, recall, F1 score, and counts of common, retrieved, and relevant. | |
| link for code: link for this code: https://github.com/davidsbatista/Aspect-Based-Sentiment-Analysis/blob/1d9c8ec1131993d924e96676fa212db6b53cb870/libraries/baselines.py#L387 | |
| """ | |
| b = 1 | |
| common, relevant, retrieved = 0.0, 0.0, 0.0 | |
| for truth, pred in zip(truths, preds): | |
| common += len([a for a in pred if a in truth]) | |
| retrieved += len(pred) | |
| relevant += len(truth) | |
| precision = common / retrieved if retrieved > 0 else 0.0 | |
| recall = common / relevant if relevant > 0 else 0.0 | |
| f1 = ( | |
| (1 + (b**2)) | |
| * precision | |
| * recall | |
| / ((precision * b**2) + recall) | |
| if precision > 0 and recall > 0 | |
| else 0.0 | |
| ) | |
| return { | |
| "precision": precision, | |
| "recall": recall, | |
| "f1_score": f1, | |
| "common": common, | |
| "retrieved": retrieved, | |
| "relevant": relevant, | |
| } | |