File size: 1,638 Bytes
af6e330 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | # Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0
"""Implements a Hugging Causal LM wrapped inside a :class:`.ComposerModel`."""
from typing import Mapping, Union
from composer.metrics.nlp import (
InContextLearningLMAccuracy,
InContextLearningLMExpectedCalibrationError,
InContextLearningMCExpectedCalibrationError,
InContextLearningMultipleChoiceAccuracy,
InContextLearningQAAccuracy,
InContextLearningCodeEvalAccuracy,
LanguageCrossEntropy,
LanguagePerplexity,
)
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
from composer.models.huggingface import HuggingFaceModel
__all__ = ["ComposerOpenLMCausalLM", "SimpleComposerOpenLMCausalLM"]
Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
TRAIN_METRICS = [
LanguageCrossEntropy(),
LanguagePerplexity(),
]
EVAL_METRICS = [
LanguageCrossEntropy(),
LanguagePerplexity(),
InContextLearningLMAccuracy(),
InContextLearningMultipleChoiceAccuracy(),
InContextLearningQAAccuracy(),
InContextLearningLMExpectedCalibrationError(),
InContextLearningMCExpectedCalibrationError(),
InContextLearningCodeEvalAccuracy(),
]
class SimpleComposerOpenLMCausalLM(HuggingFaceModel):
def __init__(self, model, tokenizer):
super().__init__(
model=model,
tokenizer=tokenizer,
metrics=TRAIN_METRICS,
eval_metrics=EVAL_METRICS,
shift_labels=True,
)
def generate(self, input_ids=None, inputs_embeds=None, **kwargs):
return super().generate(input_ids=input_ids, **kwargs)
|