| | |
| |
|
| | |
| | """bigbench datasets""" |
| |
|
| | from __future__ import absolute_import, division, print_function |
| |
|
| | import json |
| | import os |
| | import textwrap |
| | import six |
| | import datasets |
| |
|
| |
|
| | CITATION = r""" |
| | @article{srivastava2022beyond, |
| | title={Beyond the imitation game: Quantifying and extrapolating the capabilities of language models}, |
| | author={Srivastava, Aarohi and Rastogi, Abhinav and Rao, Abhishek and Shoeb, Abu Awal Md and Abid, Abubakar and Fisch, Adam and Brown, Adam R and Santoro, Adam and Gupta, Aditya and Garriga-Alonso, Adri{\`a} and others}, |
| | journal={arXiv preprint arXiv:2206.04615}, |
| | year={2022} |
| | } |
| | """ |
| |
|
| | DESCRIPTION = """\ |
| | bigbench json tasks |
| | """ |
| |
|
| | DATA_URL = "https://www.dropbox.com/s/cjdywlalikdb1c6/bigbench.zip?dl=1" |
| |
|
| | CONFIGS=['abstract_narrative_understanding', |
| | 'anachronisms', |
| | 'analogical_similarity', |
| | 'analytic_entailment', |
| | 'arithmetic', |
| | 'ascii_word_recognition', |
| | 'authorship_verification', |
| | 'auto_categorization', |
| | 'auto_debugging', |
| | 'bbq_lite_json', |
| | 'bridging_anaphora_resolution_barqa', |
| | 'causal_judgment', |
| | 'cause_and_effect', |
| | 'checkmate_in_one', |
| | 'chess_state_tracking', |
| | 'chinese_remainder_theorem', |
| | 'cifar10_classification', |
| | 'code_line_description', |
| | 'codenames', |
| | 'color', |
| | 'common_morpheme', |
| | 'conceptual_combinations', |
| | 'conlang_translation', |
| | 'contextual_parametric_knowledge_conflicts', |
| | 'crash_blossom', |
| | 'crass_ai', |
| | 'cryobiology_spanish', |
| | 'cryptonite', |
| | 'cs_algorithms', |
| | 'dark_humor_detection', |
| | 'date_understanding', |
| | 'disambiguation_qa', |
| | 'discourse_marker_prediction', |
| | 'disfl_qa', |
| | 'dyck_languages', |
| | 'elementary_math_qa', |
| | 'emoji_movie', |
| | 'emojis_emotion_prediction', |
| | 'empirical_judgments', |
| | 'english_proverbs', |
| | 'english_russian_proverbs', |
| | 'entailed_polarity', |
| | 'entailed_polarity_hindi', |
| | 'epistemic_reasoning', |
| | 'evaluating_information_essentiality', |
| | 'fact_checker', |
| | 'fantasy_reasoning', |
| | 'few_shot_nlg', |
| | 'figure_of_speech_detection', |
| | 'formal_fallacies_syllogisms_negation', |
| | 'gem', |
| | 'gender_inclusive_sentences_german', |
| | 'general_knowledge', |
| | 'geometric_shapes', |
| | 'goal_step_wikihow', |
| | 'gre_reading_comprehension', |
| | 'hhh_alignment', |
| | 'hindi_question_answering', |
| | 'hindu_knowledge', |
| | 'hinglish_toxicity', |
| | 'human_organs_senses', |
| | 'hyperbaton', |
| | 'identify_math_theorems', |
| | 'identify_odd_metaphor', |
| | 'implicatures', |
| | 'implicit_relations', |
| | 'indic_cause_and_effect', |
| | 'intent_recognition', |
| | 'international_phonetic_alphabet_nli', |
| | 'international_phonetic_alphabet_transliterate', |
| | 'intersect_geometry', |
| | 'irony_identification', |
| | 'kanji_ascii', |
| | 'kannada', |
| | 'key_value_maps', |
| | 'known_unknowns', |
| | 'language_games', |
| | 'language_identification', |
| | 'linguistic_mappings', |
| | 'linguistics_puzzles', |
| | 'list_functions', |
| | 'logic_grid_puzzle', |
| | 'logical_args', |
| | 'logical_deduction', |
| | 'logical_fallacy_detection', |
| | 'logical_sequence', |
| | 'mathematical_induction', |
| | 'matrixshapes', |
| | 'medical_questions_russian', |
| | 'metaphor_boolean', |
| | 'metaphor_understanding', |
| | 'minute_mysteries_qa', |
| | 'misconceptions', |
| | 'misconceptions_russian', |
| | 'mnist_ascii', |
| | 'modified_arithmetic', |
| | 'moral_permissibility', |
| | 'movie_dialog_same_or_different', |
| | 'movie_recommendation', |
| | 'mult_data_wrangling', |
| | 'navigate', |
| | 'nonsense_words_grammar', |
| | 'novel_concepts', |
| | 'object_counting', |
| | 'odd_one_out', |
| | 'operators', |
| | 'paragraph_segmentation', |
| | 'parsinlu_qa', |
| | 'parsinlu_reading_comprehension', |
| | 'penguins_in_a_table', |
| | 'periodic_elements', |
| | 'persian_idioms', |
| | 'phrase_relatedness', |
| | 'physical_intuition', |
| | 'physics', |
| | 'physics_questions', |
| | 'play_dialog_same_or_different', |
| | 'polish_sequence_labeling', |
| | 'presuppositions_as_nli', |
| | 'qa_wikidata', |
| | 'question_selection', |
| | 'real_or_fake_text', |
| | 'reasoning_about_colored_objects', |
| | 'repeat_copy_logic', |
| | 'rephrase', |
| | 'rhyming', |
| | 'riddle_sense', |
| | 'ruin_names', |
| | 'salient_translation_error_detection', |
| | 'scientific_press_release', |
| | 'semantic_parsing_in_context_sparc', |
| | 'semantic_parsing_spider', |
| | 'sentence_ambiguity', |
| | 'similarities_abstraction', |
| | 'simp_turing_concept', |
| | 'simple_arithmetic_json', |
| | 'simple_arithmetic_json_multiple_choice', |
| | 'simple_arithmetic_json_subtasks', |
| | 'simple_arithmetic_multiple_targets_json', |
| | 'simple_ethical_questions', |
| | 'simple_text_editing', |
| | 'snarks', |
| | 'social_iqa', |
| | 'social_support', |
| | 'sports_understanding', |
| | 'strange_stories', |
| | 'strategyqa', |
| | 'sufficient_information', |
| | 'suicide_risk', |
| | 'swahili_english_proverbs', |
| | 'swedish_to_german_proverbs', |
| | 'symbol_interpretation', |
| | 'tellmewhy', |
| | 'temporal_sequences', |
| | 'tense', |
| | 'timedial', |
| | 'topical_chat', |
| | 'tracking_shuffled_objects', |
| | 'understanding_fables', |
| | 'undo_permutation', |
| | 'unit_conversion', |
| | 'unit_interpretation', |
| | 'unnatural_in_context_learning', |
| | 'vitaminc_fact_verification', |
| | 'what_is_the_tao', |
| | 'which_wiki_edit', |
| | 'winowhy', |
| | 'word_sorting', |
| | 'word_unscrambling'] |
| |
|
| | class bigbench_Config(datasets.BuilderConfig): |
| | """BuilderConfig for bigbench.""" |
| |
|
| | def __init__( |
| | self, |
| | text_features, |
| | label_classes=None, |
| | process_label=lambda x: x, |
| | **kwargs, |
| | ): |
| | """BuilderConfig for bigbench. |
| | Args: |
| | text_features: `dict[string, string]`, map from the name of the feature |
| | dict for each text field to the name of the column in the tsv file |
| | data_url: `string`, url to download the zip file from |
| | data_dir: `string`, the path to the folder containing the tsv files in the |
| | downloaded zip |
| | citation: `string`, citation for the data set |
| | url: `string`, url for information about the data set |
| | """ |
| |
|
| | super(bigbench_Config, self).__init__( |
| | version=datasets.Version("1.0.0", ""), **kwargs |
| | ) |
| |
|
| | self.text_features = text_features |
| | self.data_url = DATA_URL |
| | self.data_dir = self.name |
| | self.citation = textwrap.dedent(CITATION) |
| | self.description = "" |
| | self.url = "https://github.com/google/BIG-bench" |
| |
|
| |
|
| | class bigbench(datasets.GeneratorBasedBuilder): |
| |
|
| | """The General Language Understanding Evaluation (bigbench) benchmark.""" |
| |
|
| | BUILDER_CONFIG_CLASS = bigbench_Config |
| |
|
| | BUILDER_CONFIGS = [ |
| | bigbench_Config( |
| | name=name, |
| | text_features={"inputs": "inputs"}, |
| | ) for name in CONFIGS |
| | ] |
| |
|
| | def _info(self): |
| | features = { |
| | "inputs": datasets.Value("string"), |
| | "targets": datasets.features.Sequence(datasets.Value("string")), |
| | "multiple_choice_targets": datasets.features.Sequence(datasets.Value("string")), |
| | "multiple_choice_scores": datasets.features.Sequence(datasets.Value("int32")), |
| |
|
| | } |
| | features["idx"] = datasets.Value("int32") |
| | return datasets.DatasetInfo( |
| | description=DESCRIPTION, |
| | features=datasets.Features(features), |
| | homepage=self.config.url, |
| | citation=self.config.citation + "\n" + CITATION, |
| | ) |
| |
|
| | def _split_generators(self, dl_manager): |
| | dl_dir = dl_manager.download_and_extract(self.config.data_url) |
| | data_dir = os.path.join(dl_dir, self.config.data_dir) |
| |
|
| | return [ |
| | datasets.SplitGenerator( |
| | name=datasets.Split.TRAIN, |
| | gen_kwargs={ |
| | "data_file": os.path.join(data_dir or "", "train.jsonl"), |
| | "split": "train", |
| | }, |
| | ), |
| | datasets.SplitGenerator( |
| | name=datasets.Split.VALIDATION, |
| | gen_kwargs={ |
| | "data_file": os.path.join(data_dir or "", "validation.jsonl"), |
| | "split": "validation", |
| | }, |
| | ), |
| | ] |
| |
|
| | def _generate_examples(self, data_file,split): |
| | """Yields examples.""" |
| | with open(data_file, "r", encoding="utf-8") as f: |
| | for id_, line in enumerate(f): |
| | line_dict = json.loads(line) |
| | yield id_, line_dict |
| |
|