Spaces:
Runtime error
Runtime error
| from itertools import chain | |
| from random import choice | |
| from typing import Any, Dict, List, Optional, Tuple | |
| from datasets import Dataset | |
| def adjust_predictions(refs, preds, choices): | |
| """Adjust predictions to match the length of references with either a special token or random choice.""" | |
| adjusted_preds = [] | |
| for ref, pred in zip(refs, preds): | |
| if len(pred) < len(ref): | |
| missing_count = len(ref) - len(pred) | |
| pred.extend([choice(choices) for _ in range(missing_count)]) | |
| adjusted_preds.append(pred) | |
| return adjusted_preds | |
| def extract_aspects(data, specific_key, specific_val): | |
| """Extracts and returns a list of specified aspect details from the nested 'aspects' data.""" | |
| return [item[specific_key][specific_val] for item in data] | |
| def absa_term_preprocess(references, predictions, subtask_key, subtask_value): | |
| """ | |
| Preprocess the terms and polarities for aspect-based sentiment analysis. | |
| Args: | |
| references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'. | |
| predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments. | |
| Returns: | |
| Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms, | |
| adjusted predicted aspect terms, true polarities, and adjusted predicted polarities. | |
| """ | |
| # Extract aspect terms and polarities | |
| truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value) | |
| pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value) | |
| truth_polarities = extract_aspects(references, subtask_key, "polarity") | |
| pred_polarities = extract_aspects(predictions, subtask_key, "polarity") | |
| # Define adjustment parameters | |
| special_token = "NONE" # For missing aspect terms | |
| sentiment_choices = [ | |
| "positive", | |
| "negative", | |
| "neutral", | |
| "conflict", | |
| ] # For missing polarities | |
| # Adjust the predictions to match the length of references | |
| adjusted_pred_terms = adjust_predictions( | |
| truth_aspect_terms, pred_aspect_terms, [special_token] | |
| ) | |
| adjusted_pred_polarities = adjust_predictions( | |
| truth_polarities, pred_polarities, sentiment_choices | |
| ) | |
| return ( | |
| flatten_list(truth_aspect_terms), | |
| flatten_list(adjusted_pred_terms), | |
| flatten_list(truth_polarities), | |
| flatten_list(adjusted_pred_polarities), | |
| ) | |
| def flatten_list(nested_list): | |
| """Flatten a nested list into a single-level list.""" | |
| return list(chain.from_iterable(nested_list)) | |
| def extract_pred_terms( | |
| all_predictions: List[Dict[str, Dict[str, str]]] | |
| ) -> List[List]: | |
| """Extract and organize predicted terms from the sentiment analysis results.""" | |
| pred_aspect_terms = [] | |
| for pred in all_predictions: | |
| terms = [term for cat in pred.values() for term in cat.keys()] | |
| pred_aspect_terms.append(terms) | |
| return pred_aspect_terms | |
| def merge_aspects_and_categories(aspects, categories): | |
| result = [] | |
| # Assuming both lists are of the same length and corresponding indices match | |
| for aspect, category in zip(aspects, categories): | |
| combined_entry = { | |
| "aspects": {"term": [], "polarity": []}, | |
| "category": {"category": [], "polarity": []}, | |
| } | |
| # Process aspect entries | |
| for cat_key, terms_dict in aspect.items(): | |
| for term, polarity in terms_dict.items(): | |
| combined_entry["aspects"]["term"].append(term) | |
| combined_entry["aspects"]["polarity"].append(polarity) | |
| # Add category details based on the aspect's key if available in categories | |
| if cat_key in category: | |
| combined_entry["category"]["category"].append(cat_key) | |
| combined_entry["category"]["polarity"].append( | |
| category[cat_key] | |
| ) | |
| # Ensure all keys in category are accounted for | |
| for cat_key, polarity in category.items(): | |
| if cat_key not in combined_entry["category"]["category"]: | |
| combined_entry["category"]["category"].append(cat_key) | |
| combined_entry["category"]["polarity"].append(polarity) | |
| result.append(combined_entry) | |
| return result | |