IvoHoese
/

LLM_Dataset_Inference

Safetensors

Model card Files Files and versions

xet

Community

IvoHoese commited on Mar 3

Commit

5ac3ae1

verified ·

1 Parent(s): 24aebf2

Upload utils.py

Browse files

Files changed (1) hide show

utils.py +146 -0

utils.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from transformers import OlmoModel, OlmoPreTrainedModel, GenerationMixin, AutoConfig, AutoModelForSequenceClassification
+from transformers.modeling_outputs import SequenceClassifierOutputWithPast
+import torch
+from peft import PeftModel, PeftConfig
+from transformers import AutoConfig
+import logging
+from contextlib import contextmanager
+# The custom model for using Olmo with a sequence classification task
+device = "cuda" if torch.cuda.is_available() else "cpu"
+class OlmoForSequenceClassification(OlmoPreTrainedModel, GenerationMixin):
+    def __init__(self, config):
+        # Check OlmoForCausalLM.__init__
+        super().__init__(config)
+        self.model = OlmoModel(config)
+        self.num_labels = config.num_labels
+        self.classifier = torch.nn.Linear(config.hidden_size, config.num_labels)
+        # Initialize weights and apply final processing
+        self.post_init()
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: torch.Tensor | None = None,
+        labels: torch.LongTensor | None = None,
+        **kwargs,
+    ) -> SequenceClassifierOutputWithPast:
+        outputs = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            **kwargs,
+        )
+        logits = self.classifier(outputs.last_hidden_state)  # [B, N, H] => [B, N, C]
+        pooled_logits = logits[:, -1]   # NOTE: tokenizer.padding_side must be 'left'
+        loss = None
+        if labels is not None:
+            loss = self.loss_function(
+                logits=logits,
+                labels=labels,
+                pooled_logits=pooled_logits,
+                config=self.config,
+            )
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=pooled_logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+# The function for loading a fulltuning model
+def get_fulltuning_model(model_path, model_type="olmo"):
+    if model_type == "olmo":
+        model = OlmoForSequenceClassification.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            torch_dtype=torch.float32,
+        ).to("cuda" if torch.cuda.is_available() else "cpu")
+        model.eval()
+    elif model_type == "pythia":
+        cfg = AutoConfig.from_pretrained(model_path, num_labels=3)
+        model = AutoModelForSequenceClassification.from_pretrained(
+            model_path,
+            config=cfg,
+            torch_dtype=torch.float32,
+        ).to(device)
+    else:
+        raise ValueError(f"Unsupported model_type: {model_type}")
+    return model
+# The following function is used to suppress a "missing or unexpected params" warning.
+# This warning is no reason for concern. It stems from the fact that the model is first loaded
+# without a classifier head, which is added afterwards.
+class DropLoadReport(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        return "LOAD REPORT" not in record.getMessage()
+@contextmanager
+def suppress_load_report_only():
+    f = DropLoadReport()
+    names = [
+        "transformers.modeling_utils",
+        "transformers.modeling_tf_pytorch_utils",
+        "transformers",
+    ]
+    loggers = [logging.getLogger(n) for n in names]
+    for lg in loggers:
+        lg.addFilter(f)
+    try:
+        yield
+    finally:
+        for lg in loggers:
+            lg.removeFilter(f)
+# The function for loading a softprompt model
+def get_peft_model(model_path, model_type="olmo"):
+    peft_config = PeftConfig.from_pretrained(model_path)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if model_type == "olmo":
+        config = AutoConfig.from_pretrained(
+            peft_config.base_model_name_or_path,
+            trust_remote_code=True,
+            num_labels=2,
+        )
+        with suppress_load_report_only():
+            base = OlmoForSequenceClassification.from_pretrained(
+                peft_config.base_model_name_or_path,
+                trust_remote_code=True,
+                torch_dtype=torch.float32,
+                config=config,
+            ).to(device)
+    elif model_type == "pythia":
+        config = AutoConfig.from_pretrained(
+            peft_config.base_model_name_or_path,
+            num_labels=2,
+        )
+        with suppress_load_report_only():
+            base = AutoModelForSequenceClassification.from_pretrained(
+                peft_config.base_model_name_or_path,
+                config=config,
+                torch_dtype=torch.float32,
+            ).to(device)
+    else:
+        raise ValueError(f"Unsupported model_type: {model_type}")
+    with suppress_load_report_only():
+        model = PeftModel.from_pretrained(base, model_path).to(device)
+    model.eval()
+    return model