| from module import config, transformers_utility as tr, utils, metrics, dataio | |
| from prettytable import PrettyTable | |
| import numpy as np | |
| table = PrettyTable() | |
| table.field_names = config.tissues | |
| TOKENIZER_DIR = config.models / "byte-level-bpe-tokenizer" | |
| PRETRAINED_MODEL = config.models / "transformer" / "prediction-model" / "saved_model.pth" | |
| DATA_DIR = config.data | |
| def load_model(args, settings): | |
| return tr.load_model( | |
| args.model_name, | |
| args.tokenizer_dir, | |
| pretrained_model=args.pretrained_model, | |
| log_offset=args.log_offset, | |
| **settings, | |
| ) | |
| def main(TEST_DATA): | |
| args = utils.get_args( | |
| data_dir=DATA_DIR, | |
| train_data=TEST_DATA, | |
| test_data=TEST_DATA, | |
| pretrained_model=PRETRAINED_MODEL, | |
| tokenizer_dir=TOKENIZER_DIR, | |
| model_name="roberta-pred-mean-pool", | |
| ) | |
| settings = utils.get_model_settings(config.settings, args) | |
| if args.output_mode: | |
| settings["output_mode"] = args.output_mode | |
| if args.tissue_subset is not None: | |
| settings["num_labels"] = len(args.tissue_subset) | |
| print("Loading model...") | |
| config_obj, tokenizer, model = load_model(args, settings) | |
| print("Loading data...") | |
| datasets = dataio.load_datasets( | |
| tokenizer, | |
| args.train_data, | |
| eval_data=args.eval_data, | |
| test_data=args.test_data, | |
| seq_key="text", | |
| file_type="text", | |
| filter_empty=args.filter_empty, | |
| shuffle=False, | |
| ) | |
| dataset_test = datasets["train"] | |
| print("Getting predictions:") | |
| preds = np.exp(np.array(metrics.get_predictions(model, dataset_test))) - 1 | |
| for e in preds: | |
| table.add_row(e) | |
| print(table) | |
| if __name__ == "__main__": | |
| main("test.txt") |