Spaces:
Runtime error
Runtime error
| import json | |
| from pathlib import Path | |
| import spacy | |
| from spacy.training import Example | |
| def make_training_doc(nlp: spacy, data: list): | |
| """ | |
| To convert data into spacy doc type that can be use for training | |
| parameters: | |
| nlp: model | |
| data: training data | |
| returns: | |
| trainiing_data: list of spacy doc | |
| """ | |
| training_data = [] | |
| for text, annotations in data: | |
| doc = nlp.make_doc(text) | |
| example = Example.from_dict(doc, annotations) | |
| training_data.append(example) | |
| return training_data | |
| def load_model(model: str=None): | |
| """ | |
| Load the model indicated by model | |
| parameters: | |
| model: str , name of the model to load | |
| returns: | |
| nlp: spacy model object | |
| optimizer : the optimizer to be use in training | |
| """ | |
| if model is not None: | |
| nlp = spacy.load(model) # load existing spaCy model | |
| print("Loaded model '%s'" % model) | |
| optimizer = nlp.resume_training() | |
| else: | |
| nlp = spacy.blank('en') # create blank Language class | |
| print("Created blank 'en' model") | |
| optimizer = nlp.begin_training() | |
| return nlp, optimizer | |
| def save_model(model: spacy, output_dir: str): | |
| """ | |
| Save the model to the output_dir | |
| parameters: | |
| model: spacy model | |
| output_dir: path | |
| """ | |
| if output_dir is not None: | |
| output_dir = Path(output_dir) | |
| if not output_dir.exists(): | |
| output_dir.mkdir() | |
| model.to_disk(output_dir) | |
| print("Saved model to", output_dir) | |
| return None | |
| def load_data(args): | |
| """ | |
| Load training data, evaluation data as well as entities dictionary | |
| parameters: | |
| args: dict, configuration from the config file | |
| returns: | |
| train_dict, entities_dict, eval_dict | |
| """ | |
| assert args['train_dir'] != None, 'indicate path for training directory' | |
| # Load the training data | |
| with open(args['train_dir']) as f: | |
| train_dict = json.load(f) | |
| print('Loaded Training Data') | |
| try: | |
| entities_dict=train_dict[args['ent_key']] | |
| print('Loaded Entities from Training Data') | |
| except KeyError: | |
| entities_dict=None | |
| print('No classes for entities found in data loaded. Proceed to check in ent_dir') | |
| # Load entities | |
| if args['ent_dir'] is not None and entities_dict is None: | |
| with open(args['ent_dir']) as f: | |
| entities_dict = json.load(f) | |
| entities_dict = entities_dict[args['ent_key']] | |
| print('Loaded Entities from ent_dir') | |
| elif args['ent_dir'] is None and entities_dict is None: | |
| assert entities_dict != None, 'No entities found from training_dir & ent_dir' | |
| # Load eval data | |
| if args['eval_dir'] is not None: | |
| with open(args['eval_dir']) as f: | |
| eval_dict = json.load(f) | |
| print('Loaded Evaluating Data') | |
| else: | |
| return train_dict, entities_dict, None | |
| return train_dict, entities_dict, eval_dict |