veryfansome commited on Jun 11, 2025

Commit

0dfbd20

1 Parent(s): b249cec

Big cleanup

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

conll2012_dataset_maker.py +0 -120
dataset/o3-mini_20250218/data-00000-of-00001.arrow +0 -3
dataset/o3-mini_20250218/dataset_info.json +0 -110
dataset/o3-mini_20250218/state.json +0 -13
dataset/ud_augmented_1st_pass/dataset_dict.json +0 -1
dataset/ud_augmented_1st_pass/test/data-00000-of-00001.arrow +0 -3
dataset/ud_augmented_1st_pass/test/dataset_info.json +0 -166
dataset/ud_augmented_1st_pass/test/state.json +0 -37
dataset/ud_augmented_1st_pass/train/data-00000-of-00001.arrow +0 -3
dataset/ud_augmented_1st_pass/train/dataset_info.json +0 -166
dataset/ud_augmented_1st_pass/train/state.json +0 -37
dataset/ud_augmented_1st_pass/validation/data-00000-of-00001.arrow +0 -3
dataset/ud_augmented_1st_pass/validation/dataset_info.json +0 -166
dataset/ud_augmented_1st_pass/validation/state.json +0 -37
dataset/ud_augmented_jj_rb_types_20250320/dataset_dict.json +0 -1
dataset/ud_augmented_jj_rb_types_20250320/test/data-00000-of-00001.arrow +0 -3
dataset/ud_augmented_jj_rb_types_20250320/test/dataset_info.json +0 -222
dataset/ud_augmented_jj_rb_types_20250320/test/state.json +0 -45
dataset/ud_augmented_jj_rb_types_20250320/train/data-00000-of-00001.arrow +0 -3
dataset/ud_augmented_jj_rb_types_20250320/train/dataset_info.json +0 -222
dataset/ud_augmented_jj_rb_types_20250320/train/state.json +0 -45
dataset/ud_augmented_jj_rb_types_20250320/validation/data-00000-of-00001.arrow +0 -3
dataset/ud_augmented_jj_rb_types_20250320/validation/dataset_info.json +0 -222
dataset/ud_augmented_jj_rb_types_20250320/validation/state.json +0 -45
dataset/ud_transform_only_20250317/dataset_dict.json +0 -1
dataset/ud_transform_only_20250317/test/data-00000-of-00001.arrow +0 -3
dataset/ud_transform_only_20250317/test/dataset_info.json +0 -222
dataset/ud_transform_only_20250317/test/state.json +0 -45
dataset/ud_transform_only_20250317/train/data-00000-of-00001.arrow +0 -3
dataset/ud_transform_only_20250317/train/dataset_info.json +0 -222
dataset/ud_transform_only_20250317/train/state.json +0 -45
dataset/ud_transform_only_20250317/validation/data-00000-of-00001.arrow +0 -3
dataset/ud_transform_only_20250317/validation/dataset_info.json +0 -222
dataset/ud_transform_only_20250317/validation/state.json +0 -45
dataset_splitter.py +0 -43
goemotions_predict.py +0 -63
llama_dataset_maker.py +0 -194
models/conll2012_en12_20250305/added_tokens.json +0 -3
models/conll2012_en12_20250305/config.json +0 -135
models/conll2012_en12_20250305/model.safetensors +0 -3
models/conll2012_en12_20250305/special_tokens_map.json +0 -15
models/conll2012_en12_20250305/spm.model +0 -3
models/conll2012_en12_20250305/tokenizer.json +0 -0
models/conll2012_en12_20250305/tokenizer_config.json +0 -60
models/conll2012_en12_20250305/training_args.bin +0 -3
models/ud_augmented_jj_rb_types_20250320_v2/README.md +0 -0
models/ud_augmented_jj_rb_types_20250320_v2/added_tokens.json +0 -3
models/ud_augmented_jj_rb_types_20250320_v2/config.json +0 -388
models/ud_augmented_jj_rb_types_20250320_v2/model.safetensors +0 -3
models/ud_augmented_jj_rb_types_20250320_v2/special_tokens_map.json +0 -51

conll2012_dataset_maker.py DELETED Viewed

@@ -1,120 +0,0 @@
-from datasets import load_dataset, DatasetDict
-import argparse
-import logging
-from utils import default_logging_config, get_uniq_training_labels, show_examples
-logger = logging.getLogger(__name__)
-allowed_pos = {'``', '$', "''", ',', '-LRB-', '-RRB-', '.', ':', 'ADD', 'CC', 'CD', 'DT', 'EX',
-               'FW', 'HYPH', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NFP', 'NN', 'NNP', 'NNPS', 'NNS', 'PDT', 'POS',
-               'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ',
-               'WDT', 'WP', 'WP$', 'WRB'}
-allowed_ner = {'O', 'B-PERSON', 'I-PERSON', 'B-NORP', 'I-NORP', 'B-FAC', 'I-FAC', 'B-ORG', 'I-ORG', 'B-GPE', 'I-GPE',
-               'B-LOC', 'I-LOC', 'B-PRODUCT', 'I-PRODUCT', 'B-DATE', 'I-DATE', 'B-TIME', 'I-TIME', 'B-PERCENT',
-               'I-PERCENT', 'B-MONEY', 'I-MONEY', 'B-QUANTITY', 'I-QUANTITY', 'B-ORDINAL', 'I-ORDINAL',
-               'B-CARDINAL', 'I-CARDINAL', 'B-EVENT', 'I-EVENT', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'B-LAW', 'I-LAW',
-               'B-LANGUAGE', 'I-LANGUAGE'}
-def is_valid_example(exp):
-    """
-    Simple filter that checks if all pos_tags are in allowed_pos
-    and all ner_tags are in allowed_ner. If you do not want any
-    filtering, simply return True.
-    """
-    # You can skip filtering by just returning True:
-    # return True
-    # If your dataset has multiple tokens with possibly different tags,
-    # check them all:
-    for pos_tag in exp["pos_tags"]:
-        if pos_tag not in allowed_pos:
-            return False
-    for ner_tag in exp["ner_tags"]:
-        if ner_tag not in allowed_ner:
-            return False
-    return True
-def transform_and_filter_dataset(onto_ds):
-    """
-    onto_ds is a DatasetDict with splits: 'train', 'validation', 'test', etc.
-    Return a new DatasetDict with the same splits but:
-      - Filter out unwanted examples
-      - Possibly rename or remove columns
-      - Possibly introduce new columns
-    """
-    pos_tag_int2str = onto_ds["train"].features["sentences"][0]["pos_tags"].feature.names
-    ner_tag_int2str = onto_ds["train"].features["sentences"][0]["named_entities"].feature.names
-    def flatten_ontonotes(batch):
-        out = {
-            "tokens": [],
-            "ner_tags": [],
-            "pos_tags": [],
-            "verb_predicate": [],
-        }
-        for doc_id, sents in zip(batch["document_id"], batch["sentences"]):
-            for sent_info in sents:
-                out["tokens"].append(sent_info["words"])
-                out["ner_tags"].append([ner_tag_int2str[i] for i in sent_info["named_entities"]])
-                out["pos_tags"].append([pos_tag_int2str[i] for i in sent_info["pos_tags"]])
-                out["verb_predicate"].append([("Yes" if s else "O") for s in sent_info["predicate_lemmas"]])
-        return out
-    new_splits = {}
-    for split_name, split_ds in onto_ds.items():
-        # Flatten
-        flattened_ds = split_ds.map(
-            flatten_ontonotes,
-            batched=True,
-            remove_columns=["sentences", "document_id"],  # remove old columns
-        )
-        # Filter out invalid examples
-        filtered_split = flattened_ds.filter(is_valid_example)
-        new_splits[split_name] = filtered_split
-    return DatasetDict(new_splits)
-# ------------------------------------------------------------------------------
-# 6) Main Script
-# ------------------------------------------------------------------------------
-if __name__ == "__main__":
-    import logging.config
-    arg_parser = argparse.ArgumentParser(description="Process OntoNotes CoNLL-2012 (English).")
-    arg_parser.add_argument("--log-level", help="Log level.", action="store",
-                            default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
-    arg_parser.add_argument("--save", help="Save final dataset to disk.", action="store_true", default=False)
-    arg_parser.add_argument("--save-path", help="Where to save final dataset.", default="./conll2012_en12_training_data")
-    arg_parser.add_argument("--show", help="Show examples: <split>/<col>/<label>/<count>", default=None)
-    args = arg_parser.parse_args()
-    logging.config.dictConfig(default_logging_config)
-    logger.setLevel(args.log_level)
-    # 6a) Load OntoNotes (English) from the 'conll2012_ontonotesv5' script
-    #     This usually yields "train", "validation", "test" splits.
-    ontonotes_ds = load_dataset("conll2012_ontonotesv5", "english_v12")
-    logger.info(f"Splits loaded: {ontonotes_ds}")
-    # 6b) Transform & Filter
-    final_dataset = transform_and_filter_dataset(ontonotes_ds)
-    # 6d) Show examples if user requested
-    show_examples(final_dataset, args.show)
-    # 6e) Log unique training labels (POS/NER) if you like
-    get_uniq_training_labels(final_dataset)
-    # 6f) Save to disk if requested
-    if args.save:
-        final_dataset.save_to_disk(args.save_path)
-        logger.info("Saved dataset to %s", args.save_path)

dataset/o3-mini_20250218/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d78eaab378f462e45adbf026f13c3d5b9a289ddea75d399dbbf0c12b0c25c11e
-size 40179808

dataset/o3-mini_20250218/dataset_info.json DELETED Viewed

@@ -1,110 +0,0 @@
-{
-  "citation": "",
-  "description": "",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "adj": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "adv": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "det": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "enc": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "func": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "misc": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ner1": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ner2": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "noun": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pronoun": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "punct": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "verb": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "wh": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "",
-  "license": ""
-}

dataset/o3-mini_20250218/state.json DELETED Viewed

@@ -1,13 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "4a79c58b9023cf85",
-  "_format_columns": null,
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_augmented_1st_pass/dataset_dict.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"splits": ["test", "train", "validation"]}

dataset/ud_augmented_1st_pass/test/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:72ff4b6e757e35c614da44f3b7c13fbde97c3411ddb9417bf9dc3586b82d8a13
-size 7444864

dataset/ud_augmented_1st_pass/test/dataset_info.json DELETED Viewed

@@ -1,166 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Poss": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Reflex": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Typo": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Emotion": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerLocation": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerOrganization": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerPerson": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_augmented_1st_pass/test/state.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "9e6189ebb4d00723",
-  "_format_columns": [
-    "text",
-    "tokens",
-    "xpos",
-    "deprel",
-    "Case",
-    "Definite",
-    "Degree",
-    "Gender",
-    "Mood",
-    "NumType",
-    "Number",
-    "Person",
-    "Poss",
-    "PronType",
-    "Reflex",
-    "Tense",
-    "Typo",
-    "VerbForm",
-    "Emotion",
-    "AdjType",
-    "NerLocation",
-    "NerOrganization",
-    "NerPerson"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_augmented_1st_pass/train/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:db9cdfd62c504e0d4ed6ecc5ca28a067785440611e7915f0a42997e71e8cb27b
-size 30680400

dataset/ud_augmented_1st_pass/train/dataset_info.json DELETED Viewed

@@ -1,166 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Poss": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Reflex": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Typo": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Emotion": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerLocation": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerOrganization": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerPerson": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_augmented_1st_pass/train/state.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "ad7a0ecbd5effa06",
-  "_format_columns": [
-    "text",
-    "tokens",
-    "xpos",
-    "deprel",
-    "Case",
-    "Definite",
-    "Degree",
-    "Gender",
-    "Mood",
-    "NumType",
-    "Number",
-    "Person",
-    "Poss",
-    "PronType",
-    "Reflex",
-    "Tense",
-    "Typo",
-    "VerbForm",
-    "Emotion",
-    "AdjType",
-    "NerLocation",
-    "NerOrganization",
-    "NerPerson"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_augmented_1st_pass/validation/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9ae4a4bf4daf5bcee6e968dcdef9b2d303d781bfac1b8323a20072c53aff7b03
-size 4514168

dataset/ud_augmented_1st_pass/validation/dataset_info.json DELETED Viewed

@@ -1,166 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Poss": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Reflex": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Typo": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Emotion": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerLocation": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerOrganization": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NerPerson": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_augmented_1st_pass/validation/state.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "9ba8b77501d6e582",
-  "_format_columns": [
-    "text",
-    "tokens",
-    "xpos",
-    "deprel",
-    "Case",
-    "Definite",
-    "Degree",
-    "Gender",
-    "Mood",
-    "NumType",
-    "Number",
-    "Person",
-    "Poss",
-    "PronType",
-    "Reflex",
-    "Tense",
-    "Typo",
-    "VerbForm",
-    "Emotion",
-    "AdjType",
-    "NerLocation",
-    "NerOrganization",
-    "NerPerson"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_augmented_jj_rb_types_20250320/dataset_dict.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"splits": ["test", "train", "validation"]}

dataset/ud_augmented_jj_rb_types_20250320/test/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:febd70182c55d126b9faaa6572684cf25ef41fa4191aa59e26d0789985bb9ec7
-size 9567448

dataset/ud_augmented_jj_rb_types_20250320/test/dataset_info.json DELETED Viewed

@@ -1,222 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "CdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ConjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "DetHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "InHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "MdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "WhHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjGrad": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjPos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_augmented_jj_rb_types_20250320/test/state.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "0172d8a8a781517f",
-  "_format_columns": [
-    "AdjGrad",
-    "AdjHead",
-    "AdjPos",
-    "AdjType",
-    "AdvHead",
-    "AdvType",
-    "Case",
-    "CdHead",
-    "ConjHead",
-    "Definite",
-    "Degree",
-    "DetHead",
-    "Gender",
-    "InHead",
-    "MdHead",
-    "Mood",
-    "NounHead",
-    "NumType",
-    "Number",
-    "Person",
-    "PronHead",
-    "PronType",
-    "Tense",
-    "VerbForm",
-    "VerbHead",
-    "WhHead",
-    "deprel",
-    "pos",
-    "text",
-    "tokens",
-    "xpos"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_augmented_jj_rb_types_20250320/train/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6c0bcd7cd11b46e27a48db9579fb9c71767971617c690f53cc5535c6e4cc39c4
-size 38694752

dataset/ud_augmented_jj_rb_types_20250320/train/dataset_info.json DELETED Viewed

@@ -1,222 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "CdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ConjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "DetHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "InHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "MdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "WhHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjGrad": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjPos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_augmented_jj_rb_types_20250320/train/state.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "e4c987937dbd5ac4",
-  "_format_columns": [
-    "AdjGrad",
-    "AdjHead",
-    "AdjPos",
-    "AdjType",
-    "AdvHead",
-    "AdvType",
-    "Case",
-    "CdHead",
-    "ConjHead",
-    "Definite",
-    "Degree",
-    "DetHead",
-    "Gender",
-    "InHead",
-    "MdHead",
-    "Mood",
-    "NounHead",
-    "NumType",
-    "Number",
-    "Person",
-    "PronHead",
-    "PronType",
-    "Tense",
-    "VerbForm",
-    "VerbHead",
-    "WhHead",
-    "deprel",
-    "pos",
-    "text",
-    "tokens",
-    "xpos"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_augmented_jj_rb_types_20250320/validation/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ba7c69aecada9238046f52b3bbffcec94c5ab2a7a23106803453578507a36d1c
-size 5724752

dataset/ud_augmented_jj_rb_types_20250320/validation/dataset_info.json DELETED Viewed

@@ -1,222 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "CdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ConjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "DetHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "InHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "MdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "WhHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjGrad": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjPos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_augmented_jj_rb_types_20250320/validation/state.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "25f7c9b3bbb3e13e",
-  "_format_columns": [
-    "AdjGrad",
-    "AdjHead",
-    "AdjPos",
-    "AdjType",
-    "AdvHead",
-    "AdvType",
-    "Case",
-    "CdHead",
-    "ConjHead",
-    "Definite",
-    "Degree",
-    "DetHead",
-    "Gender",
-    "InHead",
-    "MdHead",
-    "Mood",
-    "NounHead",
-    "NumType",
-    "Number",
-    "Person",
-    "PronHead",
-    "PronType",
-    "Tense",
-    "VerbForm",
-    "VerbHead",
-    "WhHead",
-    "deprel",
-    "pos",
-    "text",
-    "tokens",
-    "xpos"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_transform_only_20250317/dataset_dict.json DELETED Viewed

	@@ -1 +0,0 @@
1	- {"splits": ["test", "train", "validation"]}

dataset/ud_transform_only_20250317/test/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7de427f099e36be3e7ab659b1d6522afa30137f6044a1254ec9c36eb3828929a
-size 9862544

dataset/ud_transform_only_20250317/test/dataset_info.json DELETED Viewed

@@ -1,222 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Poss": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Reflex": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Typo": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "CdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ConjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "DetHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "InHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ModalHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ToHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "WhHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_transform_only_20250317/test/state.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "12f8503a2d9f38d0",
-  "_format_columns": [
-    "text",
-    "tokens",
-    "xpos",
-    "deprel",
-    "pos",
-    "Case",
-    "Definite",
-    "Degree",
-    "Gender",
-    "Mood",
-    "NumType",
-    "Number",
-    "Person",
-    "Poss",
-    "PronType",
-    "Reflex",
-    "Tense",
-    "Typo",
-    "VerbForm",
-    "AdjHead",
-    "AdvHead",
-    "CdHead",
-    "ConjHead",
-    "DetHead",
-    "InHead",
-    "ModalHead",
-    "NounHead",
-    "PronounHead",
-    "ToHead",
-    "VerbHead",
-    "WhHead"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_transform_only_20250317/train/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fc5931b824b13f2ac2bb76da737c46256137ef66365e90672f89f45a7b2b15bb
-size 40720208

dataset/ud_transform_only_20250317/train/dataset_info.json DELETED Viewed

@@ -1,222 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Poss": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Reflex": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Typo": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "CdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ConjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "DetHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "InHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ModalHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ToHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "WhHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_transform_only_20250317/train/state.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "fe61d5df50a17d24",
-  "_format_columns": [
-    "text",
-    "tokens",
-    "xpos",
-    "deprel",
-    "pos",
-    "Case",
-    "Definite",
-    "Degree",
-    "Gender",
-    "Mood",
-    "NumType",
-    "Number",
-    "Person",
-    "Poss",
-    "PronType",
-    "Reflex",
-    "Tense",
-    "Typo",
-    "VerbForm",
-    "AdjHead",
-    "AdvHead",
-    "CdHead",
-    "ConjHead",
-    "DetHead",
-    "InHead",
-    "ModalHead",
-    "NounHead",
-    "PronounHead",
-    "ToHead",
-    "VerbHead",
-    "WhHead"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset/ud_transform_only_20250317/validation/data-00000-of-00001.arrow DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:82163cd7f5263d89a61d25886b3c5b71bfdc5fae4f778d5e545e5cfe914eb6bc
-size 5980624

dataset/ud_transform_only_20250317/validation/dataset_info.json DELETED Viewed

@@ -1,222 +0,0 @@
-{
-  "citation": "\\\n@misc{11234/1-3424,\ntitle = {Universal Dependencies 2.7},\nauthor = {Zeman, Daniel and Nivre, Joakim and Abrams, Mitchell and Ackermann, Elia and Aepli, No{\\\"e}mi and Aghaei, Hamid and Agi{\\'c}, {\\v Z}eljko and Ahmadi, Amir and Ahrenberg, Lars and Ajede, Chika Kennedy and Aleksandravi{\\v c}i{\\=u}t{\\.e}, Gabriel{\\.e} and Alfina, Ika and Antonsen, Lene and Aplonova, Katya and Aquino, Angelina and Aragon, Carolina and Aranzabe, Maria Jesus and Arnard{\\'o}ttir, {\\t H}{\\'o}runn and Arutie, Gashaw and Arwidarasti, Jessica Naraiswari and Asahara, Masayuki and Ateyah, Luma and Atmaca, Furkan and Attia, Mohammed and Atutxa, Aitziber and Augustinus, Liesbeth and Badmaeva, Elena and Balasubramani, Keerthana and Ballesteros, Miguel and Banerjee, Esha and Bank, Sebastian and Barbu Mititelu, Verginica and Basmov, Victoria and Batchelor, Colin and Bauer, John and Bedir, Seyyit Talha and Bengoetxea, Kepa and Berk, G{\\\"o}zde and Berzak, Yevgeni and Bhat, Irshad Ahmad and Bhat, Riyaz Ahmad and Biagetti, Erica and Bick, Eckhard and Bielinskien{\\.e}, Agn{\\.e} and Bjarnad{\\'o}ttir, Krist{\\'{\\i}}n and Blokland, Rogier and Bobicev, Victoria and Boizou, Lo{\\\"{\\i}}c and Borges V{\\\"o}lker, Emanuel and B{\\\"o}rstell, Carl and Bosco, Cristina and Bouma, Gosse and Bowman, Sam and Boyd, Adriane and Brokait{\\.e}, Kristina and Burchardt, Aljoscha and Candito, Marie and Caron, Bernard and Caron, Gauthier and Cavalcanti, Tatiana and Cebiroglu Eryigit, Gulsen and Cecchini, Flavio Massimiliano and Celano, Giuseppe G. A. and Ceplo, Slavomir and Cetin, Savas and Cetinoglu, Ozlem and Chalub, Fabricio and Chi, Ethan and Cho, Yongseok and Choi, Jinho and Chun, Jayeol and Cignarella, Alessandra T. and Cinkova, Silvie and Collomb, Aurelie and Coltekin, Cagr{\\i} and Connor, Miriam and Courtin, Marine and Davidson, Elizabeth and de Marneffe, Marie-Catherine and de Paiva, Valeria and Derin, Mehmet Oguz and de Souza, Elvis and Diaz de Ilarraza, Arantza and Dickerson, Carly and Dinakaramani, Arawinda and Dione, Bamba and Dirix, Peter and Dobrovoljc, Kaja and Dozat, Timothy and Droganova, Kira and Dwivedi, Puneet and Eckhoff, Hanne and Eli, Marhaba and Elkahky, Ali and Ephrem, Binyam and Erina, Olga and Erjavec, Tomaz and Etienne, Aline and Evelyn, Wograine and Facundes, Sidney and Farkas, Rich{\\'a}rd and Fernanda, Mar{\\'{\\i}}lia and Fernandez Alcalde, Hector and Foster, Jennifer and Freitas, Cl{\\'a}udia and Fujita, Kazunori and Gajdosov{\\'a}, Katar{\\'{\\i}}na and Galbraith, Daniel and Garcia, Marcos and G{\\\"a}rdenfors, Moa and Garza, Sebastian and Gerardi, Fabr{\\'{\\i}}cio Ferraz and Gerdes, Kim and Ginter, Filip and Goenaga, Iakes and Gojenola, Koldo and G{\\\"o}k{\\i}rmak, Memduh and Goldberg, Yoav and G{\\'o}mez Guinovart, Xavier and Gonz{\\'a}lez Saavedra,\nBerta and Grici{\\=u}t{\\.e}, Bernadeta and Grioni, Matias and Grobol, Lo{\\\"{\\i}}c and Gr{\\=u}z{\\={\\i}}tis, Normunds and Guillaume, Bruno and Guillot-Barbance, C{\\'e}line and G{\\\"u}ng{\\\"o}r, Tunga and Habash, Nizar and Hafsteinsson, Hinrik and Haji{\\v c}, Jan and Haji{\\v c} jr., Jan and H{\\\"a}m{\\\"a}l{\\\"a}inen, Mika and H{\\`a} M{\\~y}, Linh and Han, Na-Rae and Hanifmuti, Muhammad Yudistira and Hardwick, Sam and Harris, Kim and Haug, Dag and Heinecke, Johannes and Hellwig, Oliver and Hennig, Felix and Hladk{\\'a}, Barbora and Hlav{\\'a}{\\v c}ov{\\'a}, Jaroslava and Hociung, Florinel and Hohle, Petter and Huber, Eva and Hwang, Jena and Ikeda, Takumi and Ingason, Anton Karl and Ion, Radu and Irimia, Elena and Ishola, {\\d O}l{\\'a}j{\\'{\\i}}d{\\'e} and Jel{\\'{\\i}}nek, Tom{\\'a}{\\v s} and Johannsen, Anders and J{\\'o}nsd{\\'o}ttir, Hildur and J{\\o}rgensen, Fredrik and Juutinen, Markus and K, Sarveswaran and Ka{\\c s}{\\i}kara, H{\\\"u}ner and Kaasen, Andre and Kabaeva, Nadezhda and Kahane, Sylvain and Kanayama, Hiroshi and Kanerva, Jenna and Katz, Boris and Kayadelen, Tolga and Kenney, Jessica and Kettnerov{\\'a}, V{\\'a}clava and Kirchner, Jesse and Klementieva, Elena and K{\\\"o}hn, Arne and K{\\\"o}ksal, Abdullatif and Kopacewicz, Kamil and Korkiakangas, Timo and Kotsyba, Natalia and Kovalevskait{\\.e}, Jolanta and Krek, Simon and Krishnamurthy, Parameswari and Kwak, Sookyoung and Laippala, Veronika and Lam, Lucia and Lambertino, Lorenzo and Lando, Tatiana and Larasati, Septina Dian and Lavrentiev, Alexei and Lee, John and L{\\^e} H{\\`{\\^o}}ng, Ph\u01b0\u01a1ng and Lenci, Alessandro and Lertpradit, Saran and Leung, Herman and Levina, Maria and Li, Cheuk Ying and Li, Josie and Li, Keying and Li, Yuan and Lim, {KyungTae} and Linden, Krister and Ljubesic, Nikola and Loginova, Olga and Luthfi, Andry and Luukko, Mikko and Lyashevskaya, Olga and Lynn, Teresa and Macketanz, Vivien and Makazhanov, Aibek and Mandl, Michael and Manning, Christopher and Manurung, Ruli and Maranduc, Catalina and Marcek, David and Marheinecke, Katrin and Mart{\\'{\\i}}nez Alonso, H{\\'e}ctor and Martins, Andr{\\'e} and Masek, Jan and Matsuda, Hiroshi and Matsumoto, Yuji and {McDonald}, Ryan and {McGuinness}, Sarah and Mendonca, Gustavo and Miekka, Niko and Mischenkova, Karina and Misirpashayeva, Margarita and Missil{\\\"a}, Anna and Mititelu, Catalin and Mitrofan, Maria and Miyao, Yusuke and Mojiri Foroushani, {AmirHossein} and Moloodi, Amirsaeid and Montemagni, Simonetta and More, Amir and Moreno Romero, Laura and Mori, Keiko Sophie and Mori, Shinsuke and Morioka, Tomohiko and Moro, Shigeki and Mortensen, Bjartur and Moskalevskyi, Bohdan and Muischnek, Kadri and Munro, Robert and Murawaki, Yugo and M{\\\"u}{\\\"u}risep, Kaili and Nainwani, Pinkey and Nakhl{\\'e}, Mariam and Navarro Hor{\\~n}iacek, Juan Ignacio and Nedoluzhko,\nAnna and Ne{\\v s}pore-B{\\=e}rzkalne, Gunta and Nguy{\\~{\\^e}}n Th{\\d i}, L\u01b0\u01a1ng and Nguy{\\~{\\^e}}n Th{\\d i} Minh, Huy{\\`{\\^e}}n and Nikaido, Yoshihiro and Nikolaev, Vitaly and Nitisaroj, Rattima and Nourian, Alireza and Nurmi, Hanna and Ojala, Stina and Ojha, Atul Kr. and Ol{\\'u}{\\`o}kun, Ad{\\'e}day{\\d o}\u0300 and Omura, Mai and Onwuegbuzia, Emeka and Osenova, Petya and {\\\"O}stling, Robert and {\\O}vrelid, Lilja and {\\\"O}zate{\\c s}, {\\c S}aziye Bet{\\\"u}l and {\\\"O}zg{\\\"u}r, Arzucan and {\\\"O}zt{\\\"u}rk Ba{\\c s}aran, Balk{\\i}z and Partanen, Niko and Pascual, Elena and Passarotti, Marco and Patejuk, Agnieszka and Paulino-Passos, Guilherme and Peljak-{\\L}api{\\'n}ska, Angelika and Peng, Siyao and Perez, Cenel-Augusto and Perkova, Natalia and Perrier, Guy and Petrov, Slav and Petrova, Daria and Phelan, Jason and Piitulainen, Jussi and Pirinen, Tommi A and Pitler, Emily and Plank, Barbara and Poibeau, Thierry and Ponomareva, Larisa and Popel, Martin and Pretkalnina, Lauma and Pr{\\'e}vost, Sophie and Prokopidis, Prokopis and Przepi{\\'o}rkowski, Adam and Puolakainen, Tiina and Pyysalo, Sampo and Qi, Peng and R{\\\"a}{\\\"a}bis, Andriela and Rademaker, Alexandre and Rama, Taraka and Ramasamy, Loganathan and Ramisch, Carlos and Rashel, Fam and Rasooli, Mohammad Sadegh and Ravishankar, Vinit and Real, Livy and Rebeja, Petru and Reddy, Siva and Rehm, Georg and Riabov, Ivan and Rie{\\ss}ler, Michael and Rimkut{\\.e}, Erika and Rinaldi, Larissa and Rituma, Laura and Rocha, Luisa and R{\\\"o}gnvaldsson, Eir{\\'{\\i}}kur and Romanenko, Mykhailo and Rosa, Rudolf and Ro\u0219ca, Valentin and Rovati, Davide and Rudina, Olga and Rueter, Jack and R{\\'u}narsson, Kristjan and Sadde, Shoval and Safari, Pegah and Sagot, Benoit and Sahala, Aleksi and Saleh, Shadi and Salomoni, Alessio and Samardzi{\\'c}, Tanja and Samson, Stephanie and Sanguinetti, Manuela and S{\\\"a}rg,\nDage and Saul{\\={\\i}}te, Baiba and Sawanakunanon, Yanin and Scannell, Kevin and Scarlata, Salvatore and Schneider, Nathan and Schuster, Sebastian and Seddah, Djam{\\'e} and Seeker, Wolfgang and Seraji, Mojgan and Shen, Mo and Shimada, Atsuko and Shirasu, Hiroyuki and Shohibussirri, Muh and Sichinava, Dmitry and Sigur\u00f0sson, Einar Freyr and Silveira, Aline and Silveira, Natalia and Simi, Maria and Simionescu, Radu and Simk{\\'o}, Katalin and {\\v S}imkov{\\'a}, M{\\'a}ria and Simov, Kiril and Skachedubova, Maria and Smith, Aaron and Soares-Bastos, Isabela and Spadine, Carolyn and Steingr{\\'{\\i}}msson, Stein{\\t h}{\\'o}r and Stella, Antonio and Straka, Milan and Strickland, Emmett and Strnadov{\\'a}, Jana and Suhr, Alane and Sulestio, Yogi Lesmana and Sulubacak, Umut and Suzuki, Shingo and Sz{\\'a}nt{\\'o}, Zsolt and Taji, Dima and Takahashi, Yuta and Tamburini, Fabio and Tan, Mary Ann C. and Tanaka, Takaaki and Tella, Samson and Tellier, Isabelle and Thomas, Guillaume and Torga, Liisi and Toska, Marsida and Trosterud, Trond and Trukhina, Anna and Tsarfaty, Reut and T{\\\"u}rk, Utku and Tyers, Francis and Uematsu, Sumire and Untilov, Roman and Uresov{\\'a}, Zdenka and Uria, Larraitz and Uszkoreit, Hans and Utka, Andrius and Vajjala, Sowmya and van Niekerk, Daniel and van Noord, Gertjan and Varga, Viktor and Villemonte de la Clergerie, Eric and Vincze, Veronika and Wakasa, Aya and Wallenberg, Joel C. and Wallin, Lars and Walsh, Abigail and Wang, Jing Xian and Washington, Jonathan North and Wendt, Maximilan and Widmer, Paul and Williams, Seyi and Wir{\\'e}n, Mats and Wittern, Christian and Woldemariam, Tsegay and Wong, Tak-sum and Wr{\\'o}blewska, Alina and Yako, Mary and Yamashita, Kayo and Yamazaki, Naoki and Yan, Chunxiao and Yasuoka, Koichi and Yavrumyan, Marat M. and Yu, Zhuoran and Zabokrtsk{\\'y}, Zdenek and Zahra, Shorouq and Zeldes, Amir and Zhu, Hanzhi and Zhuravleva, Anna},\nurl = {http://hdl.handle.net/11234/1-3424},\nnote = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({{\\'U}FAL}), Faculty of Mathematics and Physics, Charles University},\ncopyright = {Licence Universal Dependencies v2.7},\nyear = {2020} }",
-  "description": "Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008).",
-  "features": {
-    "text": {
-      "dtype": "string",
-      "_type": "Value"
-    },
-    "tokens": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "xpos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "deprel": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "pos": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Case": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Definite": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Degree": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Gender": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Mood": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NumType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Number": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Person": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Poss": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronType": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Reflex": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Tense": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "Typo": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbForm": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "AdvHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "CdHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ConjHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "DetHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "InHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ModalHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "NounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "PronounHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "ToHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "VerbHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    },
-    "WhHead": {
-      "feature": {
-        "dtype": "string",
-        "_type": "Value"
-      },
-      "_type": "Sequence"
-    }
-  },
-  "homepage": "https://universaldependencies.org/",
-  "license": ""
-}

dataset/ud_transform_only_20250317/validation/state.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "_data_files": [
-    {
-      "filename": "data-00000-of-00001.arrow"
-    }
-  ],
-  "_fingerprint": "59aa93667916292e",
-  "_format_columns": [
-    "text",
-    "tokens",
-    "xpos",
-    "deprel",
-    "pos",
-    "Case",
-    "Definite",
-    "Degree",
-    "Gender",
-    "Mood",
-    "NumType",
-    "Number",
-    "Person",
-    "Poss",
-    "PronType",
-    "Reflex",
-    "Tense",
-    "Typo",
-    "VerbForm",
-    "AdjHead",
-    "AdvHead",
-    "CdHead",
-    "ConjHead",
-    "DetHead",
-    "InHead",
-    "ModalHead",
-    "NounHead",
-    "PronounHead",
-    "ToHead",
-    "VerbHead",
-    "WhHead"
-  ],
-  "_format_kwargs": {},
-  "_format_type": null,
-  "_output_all_columns": false,
-  "_split": null
-}

dataset_splitter.py DELETED Viewed

@@ -1,43 +0,0 @@
-from datasets import DatasetDict, load_from_disk
-import argparse
-from openai_dataset_maker import features
-def has_all_valid_labels(exp):
-    for col, labels in exp.items():
-        if col in {"text", "tokens"}:
-            continue
-        for label in labels:
-            if label not in features[col]:
-                return False
-    return True
-def is_evenly_shaped(exp):
-    cnt_set = set()
-    for col, labels in exp.items():
-        if col == "text":
-            continue
-        cnt_set.add(len(labels))
-    return len(cnt_set) == 1
-if __name__ == '__main__':
-    arg_parser = argparse.ArgumentParser(description="Train multi-task model.")
-    arg_parser.add_argument("data_path", help="Load dataset from specified path.",
-                            action="store")
-    arg_parser.add_argument("--save-path", help="Save final dataset to specified path.",
-                            action="store", default="./training_data")
-    args = arg_parser.parse_args()
-    loaded_dataset = load_from_disk(args.data_path)
-    loaded_dataset = loaded_dataset.filter(is_evenly_shaped)
-    loaded_dataset = loaded_dataset.filter(has_all_valid_labels)
-    first_split = loaded_dataset.train_test_split(shuffle=True, seed=42, test_size=0.09)
-    second_split = first_split["train"].train_test_split(test_size=0.1)
-    new_ds = DatasetDict()
-    new_ds["test"] = first_split["test"]
-    new_ds["train"] = second_split["train"]
-    new_ds["validation"] = second_split["test"]
-    new_ds.save_to_disk(args.save_path)

goemotions_predict.py DELETED Viewed

@@ -1,63 +0,0 @@
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-import numpy as np
-import torch
-from utils import get_torch_device
-class GoEmotionsPredictor:
-    def __init__(self, model_name_or_path: str, subfolder=None):
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name_or_path, subfolder=subfolder)
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            model_name_or_path, subfolder=subfolder)
-        self.label_names = getattr(self.model.config, "label_names", None)
-        self.per_label_thresh = getattr(self.model.config, "per_label_thresholds", None)
-        self.global_thresh = getattr(self.model.config, "best_global_threshold", 0.65)
-        self.device = get_torch_device()
-        self.model.to(self.device)
-        self.model.eval()
-    def predict(self, texts, use_per_label=True):
-        """
-        Args:
-          texts (list[str]): A list of raw text strings to classify.
-          use_per_label (bool): If True, apply per-label thresholds. If False, apply global threshold.
-        Returns:
-          A list of dicts, each with {"text": ..., "predicted_labels": [...]}
-        """
-        encodings = self.tokenizer(
-            texts,
-            truncation=True,
-            padding=True,
-            max_length=512,
-            return_tensors="pt"
-        )
-        # Move encodings to same device as the model
-        encodings = {k: v.to(self.device) for k, v in encodings.items()}
-        # 1) Run the model to get logits
-        with torch.no_grad():
-            outputs = self.model(**encodings)
-            logits = outputs.logits  # shape: (batch_size, num_labels)
-            probs = torch.sigmoid(logits).cpu().numpy()  # shape: (batch_size, num_labels)
-        # 2) Determine predictions by thresholding
-        if use_per_label:
-            # Use per-label thresholds
-            threshold_array = np.array(self.per_label_thresh)
-            preds = (probs >= threshold_array).astype(int)  # shape: (batch_size, num_labels)
-        else:
-            # Use global threshold
-            preds = (probs >= self.global_thresh).astype(int)
-        # 3) Convert integer predictions to label names
-        results = []
-        for i, text in enumerate(texts):
-            row_preds = preds[i]
-            predicted_labels = [self.label_names[j] for j, val in enumerate(row_preds) if val == 1]
-            results.append({"text": text, "emotions": predicted_labels})
-        return results

llama_dataset_maker.py DELETED Viewed

@@ -1,194 +0,0 @@
-from abc import ABC, abstractmethod
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, Pipeline, pipeline
-import logging
-import torch
-from utils import get_torch_device
-logger = logging.getLogger(__name__)
-class ChatModel(ABC):
-    @abstractmethod
-    def generate(self, messages: list[dict[str, str]]) -> dict[str, str]:
-        pass
-class AdjLabeler:
-    def __init__(self, model: ChatModel):
-        self.model = model
-    def label_example(self, exp, feature_name):
-        messages = [
-            {"role": "system",
-             "content": "You are a helpful Grammar tutor."},
-            {"role": "user",
-             "content": "An adjective is a word that describes a noun?"},
-            {"role": "assistant",
-             "content": "Yes, that's correct! An adjective relates to, modifies, or describes nouns."},
-            {"role": "user",
-             "content": "Are they always used with nouns?"},
-            {"role": "assistant",
-             "content": ("No, adjectives often appear directly before nouns (e.g. \"a red apple\") "
-                         "but they can also follow linking verbs to describe the subject (e.g. \"The sky is blue\"). "
-                         "Sometimes, adjectives are used as complements in certain constructions or phrases "
-                         "(e.g. \"the rich\" or \"well-known author\").")},
-            {"role": "user",
-             "content": "They can have comparative or superlative forms too, right?"},
-            {"role": "assistant",
-             "content": ("Yes, that's right! The word \"fast\" can take a comparative form as in \"faster\" "
-                         "or a superlative form as in \"fastest\". Some adjectives don't have comparative or "
-                         "superlative forms but use the word \"more\" or \"most\" to become comparative or "
-                         "superlative.")},
-            {"role": "user",
-             "content": f"How about this example: {exp['tokens']}"},
-        ]
-        token_labels = []
-        for idx, token in enumerate(exp["tokens"]):
-            token_messages = messages.copy()
-            token_messages.append({"role": "user",
-                             "content": f"Is '{token}' at position {idx} an adjective? Answer 'yes' or 'no'."})
-            #logger.info(f"token_messages: {token_messages}")
-            assistant_message = self.model.generate(token_messages)
-            logger.info(f"{assistant_message} - {token}")
-            token_messages.append(assistant_message)
-            messages += token_messages
-        return token_labels
-class LlamaPipeline(ChatModel):
-    def __init__(self, model_name: str):
-        self.device = get_torch_device()
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.pipeline = pipeline(
-            "text-generation",
-            model=model_name,
-            model_kwargs={"torch_dtype": torch.bfloat16},
-            device_map="auto",
-        )
-    def generate(self, messages, max_new_tokens=1) :
-        outputs = self.pipeline(
-            messages,
-            max_new_tokens=max_new_tokens,
-            pad_token_id=self.tokenizer.eos_token_id,
-            temperature=0.6,
-            top_p=0.9,
-        )
-        return outputs[0]["generated_text"][-1]
-class LlamaModel(ChatModel):
-    """
-    A wrapper around a Llama  model checkpoint using Hugging Face Transformers.
-    """
-    def __init__(self, model_name: str):
-        torch_device = get_torch_device()
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map=str(torch_device),
-            torch_dtype=torch.float16,
-        )
-        self.model.to(torch_device)
-        self.model.eval()
-        # Adjust generation parameters as needed
-        self.generation_config = GenerationConfig(
-            max_new_tokens=1,
-            pad_token_id=self.tokenizer.eos_token_id,
-            temperature=0.7,
-            top_p=0.9,
-            do_sample=True,
-        )
-    def generate(self, prompt: str) -> str:
-        """
-        Generate text from the model given a prompt.
-        """
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-        with torch.no_grad():
-            output_ids = self.model.generate(
-                **inputs,
-                generation_config=self.generation_config
-            )
-        raw_output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        return raw_output[len(prompt):]
-# ----------------------------------
-# Putting It All Together
-# ----------------------------------
-if __name__ == "__main__":
-    import logging.config
-    from utils import default_logging_config
-    logging.config.dictConfig(default_logging_config)
-    llama_pipeline = LlamaPipeline(
-        model_name="meta-llama/Llama-3.2-3B-Instruct",
-        #model_name="meta-llama/Llama-3.1-8B-Instruct",
-    )
-    adj_labeler = AdjLabeler(llama_pipeline)
-    basic_cases = [
-        #{"text": "Joan has a nice dog.",
-        # "tokens": ["Joan", "has", "a", "nice", "dog."]},
-        #{"text": "Bob is the most agile person I have ever met.",
-        # "tokens": ["Bob", "is", "the", "most", "agile", "person", "I", "have", "ever", "met."]},
-        #{"text": "He's a total shit head",
-        # "tokens": ["He's", "a", "total", "shit", "head"]},
-        #{"text": "The old, creaky house stood on the quiet street.",
-        # "tokens": ["The", "old,", "creaky", "house", "stood", "on", "the", "quiet", "street."]},
-        #{"text": "The sky turned brilliant blue as the sun emerged.",
-        # "tokens": ["The", "sky", "turned", "brilliant", "blue", "as", "the", "sun", "emerged."]},
-        #{"text": "They admired the well-behaved and enthusiastic children at the party.",
-        # "tokens": ["They", "admired", "the", "well-behaved", "and", "enthusiastic", "children", "at", "the",
-        #            "party."]},
-        #{"text": "After dinner, she felt tired and content.",
-        # "tokens": ["After", "dinner,", "she", "felt", "tired", "and", "content."]},
-        #{"text": "The resourceful team devised a clever plan.",
-        # "tokens": ["The", "resourceful", "team", "devised", "a", "clever", "plan."]},
-        #{"text": "He handed over the thick book to the eager student.",
-        # "tokens": ["He", "handed", "over", "the", "thick", "book", "to", "the", "eager", "student."]},
-        #{"text": "We appreciated the delicious, handmade pie from our neighbor.",
-        # "tokens": ["We", "appreciated", "the", "delicious,", "handmade", "pie", "from", "our", "neighbor."]},
-        #{"text": "In the enchanted forest, sparkling fairies danced under the moonlight.",
-        # "tokens": ["In", "the", "enchanted", "forest,", "sparkling", "fairies", "danced", "under", "the", "moonlight."]},
-        #{"text": "The stray cats, hungry and dirty, roamed the narrow alley.",
-        # "tokens": ["The", "stray", "cats,", "hungry", "and", "dirty,", "roamed", "the", "narrow", "alley."]},
-        #{"text": "The challenging puzzle left the determined young boy both frustrated and excited.",
-        # "tokens": ["The", "challenging", "puzzle", "left", "the", "determined", "young", "boy", "both", "frustrated",
-        #            "and", "excited."]},
-        {"text": "Big cars use a lot more gas.",
-         "tokens": ["Big", "cars", "use", "a", "lot", "more", "gas."]},
-        {"text": "My car is faster than my bicycle.",
-         "tokens": ["My", "car", "is", "faster", "than", "my", "bicycle."]},
-        #{"text": "This puzzle is more challenging than the one we solved yesterday.",
-        # "tokens": ["This", "puzzle", "is", "more", "challenging", "than", "the", "one", "we", "solved", "yesterday."]},
-        #{"text": "Among all the students, Lara is the most diligent.",
-        # "tokens": ["Among", "all", "the", "students,", "Lara", "is", "the", "most", "diligent."]},
-        #{"text": "That building is taller than the one next to it.",
-        # "tokens": ["That", "building", "is", "taller", "than", "the", "one", "next", "to", "it."]},
-        #{"text": "This book is more interesting than the movie adaptation.",
-        # "tokens": ["This", "book", "is", "more", "interesting", "than", "the", "movie", "adaptation."]},
-        #{"text": "Of all the fruits, mangoes are the sweetest.",
-        # "tokens": ["Of", "all", "the", "fruits,", "mangoes", "are", "the", "sweetest."]},
-        #{"text": "His running speed is quicker than anyone else's on the team.",
-        # "tokens": ["His", "running", "speed", "is", "quicker", "than", "anyone", "else's", "on", "the", "team."]},
-        #{"text": "The exam was easier than I had anticipated.",
-        # "tokens": ["The", "exam", "was", "easier", "than", "I", "had", "anticipated."]},
-        #{"text": "Among all the flavors, vanilla is the mildest.",
-        # "tokens": ["Among", "all", "the", "flavors,", "vanilla", "is", "the", "mildest."]},
-        #{"text": "The new smartphone is lighter than the previous version.",
-        # "tokens": ["The", "new", "smartphone", "is", "lighter", "than", "the", "previous", "version."]},
-    ]
-    for case in basic_cases:
-        adj_labels = adj_labeler.label_example(case, "adj")
-        logger.info(f"\ntokens:\t{case['tokens']}\nadj:\t{adj_labels}")

models/conll2012_en12_20250305/added_tokens.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "[MASK]": 128000
-}

models/conll2012_en12_20250305/config.json DELETED Viewed

@@ -1,135 +0,0 @@
-{
-  "_name_or_path": "microsoft/deberta-v3-base",
-  "architectures": [
-    "MultiHeadModel"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label_maps": {
-    "ner_tags": [
-      "B-QUANTITY",
-      "I-PERSON",
-      "B-LANGUAGE",
-      "O",
-      "I-LOC",
-      "I-MONEY",
-      "I-DATE",
-      "I-CARDINAL",
-      "I-WORK_OF_ART",
-      "I-FAC",
-      "B-FAC",
-      "B-LOC",
-      "I-PERCENT",
-      "B-CARDINAL",
-      "B-NORP",
-      "B-TIME",
-      "B-GPE",
-      "I-LANGUAGE",
-      "B-PERSON",
-      "B-LAW",
-      "I-LAW",
-      "B-MONEY",
-      "I-ORDINAL",
-      "B-PRODUCT",
-      "B-WORK_OF_ART",
-      "B-ORDINAL",
-      "B-DATE",
-      "B-ORG",
-      "I-GPE",
-      "I-PRODUCT",
-      "B-EVENT",
-      "B-PERCENT",
-      "I-EVENT",
-      "I-ORG",
-      "I-NORP",
-      "I-QUANTITY",
-      "I-TIME"
-    ],
-    "pos_tags": [
-      "RBR",
-      "POS",
-      "EX",
-      "VBP",
-      "VBZ",
-      "``",
-      "PRP$",
-      "WP",
-      "VBD",
-      "NN",
-      "NNS",
-      "WP$",
-      "RB",
-      "UH",
-      ":",
-      "NNPS",
-      "LS",
-      "HYPH",
-      "RP",
-      "WDT",
-      "-LRB-",
-      ",",
-      "CC",
-      "JJS",
-      "MD",
-      "JJR",
-      "RBS",
-      "SYM",
-      "DT",
-      "-RRB-",
-      "FW",
-      "TO",
-      "PDT",
-      "NNP",
-      "ADD",
-      "VB",
-      "$",
-      "VBG",
-      "CD",
-      "''",
-      "WRB",
-      "PRP",
-      "NFP",
-      "JJ",
-      "VBN",
-      ".",
-      "IN"
-    ],
-    "verb_predicate": [
-      "O",
-      "Yes"
-    ]
-  },
-  "layer_norm_eps": 1e-07,
-  "legacy": true,
-  "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "num_labels_dict": {
-    "ner_tags": 37,
-    "pos_tags": 47,
-    "verb_predicate": 2
-  },
-  "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.49.0",
-  "type_vocab_size": 0,
-  "vocab_size": 128100
-}

models/conll2012_en12_20250305/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4eefbb306d8c9dd94dd544cc95aaaf31d9f8b768ef9bb47ed54ecac6c8982e68
-size 735615520

models/conll2012_en12_20250305/special_tokens_map.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "bos_token": "[CLS]",
-  "cls_token": "[CLS]",
-  "eos_token": "[SEP]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

models/conll2012_en12_20250305/spm.model DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
-size 2464616

models/conll2012_en12_20250305/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

models/conll2012_en12_20250305/tokenizer_config.json DELETED Viewed

@@ -1,60 +0,0 @@
-{
-  "add_prefix_space": true,
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "1": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "2": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "3": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "128000": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "bos_token": "[CLS]",
-  "clean_up_tokenization_spaces": false,
-  "cls_token": "[CLS]",
-  "do_lower_case": false,
-  "eos_token": "[SEP]",
-  "extra_special_tokens": {},
-  "mask_token": "[MASK]",
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "sp_model_kwargs": {},
-  "split_by_punct": false,
-  "tokenizer_class": "DebertaV2Tokenizer",
-  "unk_token": "[UNK]",
-  "vocab_type": "spm"
-}

models/conll2012_en12_20250305/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:778a691f83fb5dcfe8e9c9a6371dc7f31bf3842f4d67464cc949409d466b4db0
-size 5240

models/ud_augmented_jj_rb_types_20250320_v2/README.md DELETED Viewed

The diff for this file is too large to render. See raw diff

models/ud_augmented_jj_rb_types_20250320_v2/added_tokens.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "[MASK]": 128000
-}

models/ud_augmented_jj_rb_types_20250320_v2/config.json DELETED Viewed

@@ -1,388 +0,0 @@
-{
-  "_name_or_path": "models/ud_augmented_jj_rb_types_20250320_v2",
-  "architectures": [
-    "MultiHeadModel"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label_maps": {
-    "AdjGrad": [
-      "yes",
-      "no",
-      "O"
-    ],
-    "AdjHead": [
-      "2",
-      "4+",
-      "-1",
-      "O",
-      "-4+",
-      "3",
-      "1",
-      "-2",
-      "-3"
-    ],
-    "AdjPos": [
-      "postpositive",
-      "attributive",
-      "predicative",
-      "O"
-    ],
-    "AdjType": [
-      "material",
-      "quantifying",
-      "color",
-      "size",
-      "O",
-      "quality",
-      "purpose",
-      "origin",
-      "relational",
-      "age",
-      "shape",
-      "limiting"
-    ],
-    "AdvHead": [
-      "-3+",
-      "2",
-      "4+",
-      "-1",
-      "O",
-      "3",
-      "1",
-      "-2"
-    ],
-    "AdvType": [
-      "manner",
-      "time",
-      "negation",
-      "conjunctive",
-      "frequency",
-      "focusing",
-      "O",
-      "place",
-      "degree",
-      "disjunct",
-      "modal"
-    ],
-    "Case": [
-      "Acc",
-      "Nom",
-      "O"
-    ],
-    "CdHead": [
-      "-3+",
-      "2",
-      "-1",
-      "3+",
-      "O",
-      "1",
-      "-2"
-    ],
-    "ConjHead": [
-      "2",
-      "4+",
-      "O",
-      "3",
-      "1",
-      "-1+"
-    ],
-    "Definite": [
-      "Def",
-      "Ind",
-      "O"
-    ],
-    "Degree": [
-      "Cmp",
-      "Pos",
-      "Sup",
-      "O"
-    ],
-    "DetHead": [
-      "-2+",
-      "2",
-      "4+",
-      "-1",
-      "O",
-      "3",
-      "1"
-    ],
-    "Gender": [
-      "Fem",
-      "Masc",
-      "Neut",
-      "O"
-    ],
-    "InHead": [
-      "4",
-      "2",
-      "-2+",
-      "-1",
-      "O",
-      "3",
-      "5+",
-      "1"
-    ],
-    "MdHead": [
-      "2",
-      "3+",
-      "O",
-      "1",
-      "-1+"
-    ],
-    "Mood": [
-      "Imp",
-      "Ind",
-      "O"
-    ],
-    "NounHead": [
-      "2",
-      "4+",
-      "-5+",
-      "-1",
-      "O",
-      "3",
-      "-4",
-      "1",
-      "-2",
-      "-3"
-    ],
-    "NumType": [
-      "Mult",
-      "Card",
-      "Ord",
-      "O"
-    ],
-    "Number": [
-      "Plur",
-      "Sing",
-      "O"
-    ],
-    "Person": [
-      "3",
-      "1",
-      "2",
-      "O"
-    ],
-    "PronHead": [
-      "-2+",
-      "2",
-      "-1",
-      "3+",
-      "O",
-      "1"
-    ],
-    "PronType": [
-      "Dem",
-      "O",
-      "Rel",
-      "Int",
-      "Prs",
-      "Art"
-    ],
-    "Tense": [
-      "Past",
-      "Pres",
-      "O"
-    ],
-    "VerbForm": [
-      "Ger",
-      "Inf",
-      "O",
-      "Part",
-      "Fin"
-    ],
-    "VerbHead": [
-      "2",
-      "4+",
-      "-5+",
-      "-1",
-      "O",
-      "3",
-      "-4",
-      "1",
-      "-2",
-      "-3"
-    ],
-    "WhHead": [
-      "-2+",
-      "2",
-      "4+",
-      "-1",
-      "O",
-      "3",
-      "1"
-    ],
-    "deprel": [
-      "punct",
-      "compound",
-      "obl",
-      "case",
-      "obj",
-      "nsubj:pass",
-      "cc:preconj",
-      "list",
-      "mark",
-      "parataxis",
-      "acl",
-      "obl:npmod",
-      "root",
-      "nmod:poss",
-      "flat",
-      "iobj",
-      "nsubj",
-      "expl",
-      "compound:prt",
-      "cop",
-      "vocative",
-      "nmod",
-      "aux:pass",
-      "ccomp",
-      "det",
-      "csubj",
-      "obl:tmod",
-      "xcomp",
-      "aux",
-      "discourse",
-      "acl:relcl",
-      "cc",
-      "nmod:npmod",
-      "appos",
-      "advcl",
-      "conj",
-      "fixed",
-      "advmod",
-      "det:predet",
-      "amod",
-      "nmod:tmod",
-      "nummod"
-    ],
-    "pos": [
-      "INTJ",
-      "VERB",
-      "SYM",
-      "PROPN",
-      "ADV",
-      "AUX",
-      "SCONJ",
-      "ADJ",
-      "DET",
-      "NUM",
-      "PRON",
-      "NOUN",
-      "X",
-      "CCONJ",
-      "ADP",
-      "PUNCT",
-      "PART"
-    ],
-    "xpos": [
-      "FW",
-      "RBR",
-      "NNPS",
-      "DT",
-      "PDT",
-      "EX",
-      "HYPH",
-      "CD",
-      "ADD",
-      "SYM",
-      "PRP",
-      "JJR",
-      "MD",
-      "WDT",
-      "JJ",
-      "RB",
-      "RP",
-      "TO",
-      "NNP",
-      "NN",
-      "CC",
-      "-RRB-",
-      "VBP",
-      "WRB",
-      "''",
-      "IN",
-      ":",
-      "LS",
-      "-LRB-",
-      "VBD",
-      "VBN",
-      ".",
-      "VBZ",
-      "VBG",
-      "WP$",
-      "JJS",
-      "VB",
-      "NNS",
-      "``",
-      "POS",
-      "UH",
-      "PRP$",
-      "NFP",
-      "$",
-      "RBS",
-      ",",
-      "WP"
-    ]
-  },
-  "layer_norm_eps": 1e-07,
-  "legacy": true,
-  "max_position_embeddings": 512,
-  "max_relative_positions": -1,
-  "model_type": "deberta-v2",
-  "norm_rel_ebd": "layer_norm",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "num_labels_dict": {
-    "AdjGrad": 3,
-    "AdjHead": 9,
-    "AdjPos": 4,
-    "AdjType": 12,
-    "AdvHead": 8,
-    "AdvType": 11,
-    "Case": 3,
-    "CdHead": 7,
-    "ConjHead": 6,
-    "Definite": 3,
-    "Degree": 4,
-    "DetHead": 7,
-    "Gender": 4,
-    "InHead": 8,
-    "MdHead": 5,
-    "Mood": 3,
-    "NounHead": 10,
-    "NumType": 4,
-    "Number": 3,
-    "Person": 4,
-    "PronHead": 6,
-    "PronType": 6,
-    "Tense": 3,
-    "VerbForm": 5,
-    "VerbHead": 10,
-    "WhHead": 7,
-    "deprel": 42,
-    "pos": 17,
-    "xpos": 47
-  },
-  "pad_token_id": 0,
-  "pooler_dropout": 0,
-  "pooler_hidden_act": "gelu",
-  "pooler_hidden_size": 768,
-  "pos_att_type": [
-    "p2c",
-    "c2p"
-  ],
-  "position_biased_input": false,
-  "position_buckets": 256,
-  "relative_attention": true,
-  "share_att_key": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.49.0",
-  "type_vocab_size": 0,
-  "vocab_size": 128100
-}

models/ud_augmented_jj_rb_types_20250320_v2/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:57b5dda10a3ea434194a6896dce6e4e1e1290a8bb3db6d753b78b5b7f2b510b7
-size 804672988

models/ud_augmented_jj_rb_types_20250320_v2/special_tokens_map.json DELETED Viewed

@@ -1,51 +0,0 @@
-{
-  "bos_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}