GliteTech
/

DisamBert-base

@@ -30,9 +30,10 @@ class PaddedBatch:
 class DisamBert(PreTrainedModel):
     def __init__(self, config:PreTrainedConfig):
         super().__init__(config)
-        self.BaseModel = AutoModel.from_pretrained(config.name_or_path).to("cuda")
         self.tokenizer = AutoTokenizer.from_pretrained(config.name_or_path)
-        self.classifier_head = nn.UninitializedParameter(device="cuda")
         self.__entities = None
     @classmethod
@@ -45,29 +46,30 @@ class DisamBert(PreTrainedModel):
         vectors = []
         batch = []
         n = 0
-        for entity in entities:
-            entity_ids.append(entity.concept)
-            batch.append(entity.definition)
-            n += 1
-            if n == BATCH_SIZE:
                 tokens = self.tokenizer(batch, padding=True, return_tensors="pt")
                 encoding = self.BaseModel(
-                    tokens["input_ids"].to("cuda"), tokens["attention_mask"].to("cuda")
                 )
                 vectors.append(encoding.last_hidden_state.detach()[:, 0])
-                n = 0
-                batch = []
-        if n > 0:
-            tokens = self.tokenizer(batch, padding=True, return_tensors="pt")
-            encoding = self.BaseModel(
-                tokens["input_ids"].to("cuda"), tokens["attention_mask"].to("cuda")
-            )
-            vectors.append(encoding.last_hidden_state.detach()[:, 0])
-        self.__entities = pd.Series(entity_ids)
-        self.config.entities = entity_ids
-        self.classifier_head = nn.Parameter(torch.cat(vectors, dim=0))
     @property
     def entities(self) -> pd.Series:
@@ -81,45 +83,45 @@ class DisamBert(PreTrainedModel):
         )
         all_indices = []
         all_tokens = []
-        for sentence, span_indices in zip(sentences, indices, strict=True):
-            indices = []
-            tokens = []
-            last_span = len(span_indices) - 2
-            for i, position in enumerate(span_indices[:-1]):
-                span = sentence[position : span_indices[i + 1]]
-                span_tokens = self.tokenizer([span], padding=False)["input_ids"][0]
-                if i > 0:
-                    span_tokens = span_tokens[1:]
-                if i < last_span:
-                    span_tokens = span_tokens[:-1]
-                indices.append(len(span_tokens))
-                tokens.extend(span_tokens)
-            all_indices.append(indices)
-            all_tokens.append(tokens)
-        sentence_lengths = [len(boundaries) for boundaries in all_indices]
-        maxlen = max(sentence_lengths)
-        batch = self.pad(all_tokens)
-        token_vectors = self.BaseModel(batch.input_ids, batch.attention_mask).last_hidden_state
-        span_vectors = torch.cat(
-            [
-                torch.vstack(
-                    [
-                        torch.sum(chunk, dim=0)
-                        for chunk in self.split(token_vectors[i], sentence_indices)
-                    ]
-                )
-                for (i, sentence_indices) in enumerate(all_indices)
-            ]
-        )
-        logits = torch.einsum("ij,kj->ki", span_vectors, self.classifier_head)
-        split_logits = torch.split(logits, sentence_lengths, dim=1)
-        return torch.stack(
-            [
-                self.extend_to_max_length(sentence, length, maxlen)
-                for (sentence, length) in zip(split_logits, sentence_lengths, strict=True)
-            ]
-        )
     def split(self, vectors: torch.Tensor, lengths: list[int]) -> tuple[torch.Tensor, ...]:
         maxlen = vectors.shape[0]
@@ -135,13 +137,12 @@ class DisamBert(PreTrainedModel):
             [
                 sentence + [self.config.pad_token_id] * (maxlen - length)
                 for (sentence, length) in zip(tokens, lengths, strict=True)
-            ],
-            device="cuda",
         )
         attention_mask = torch.vstack(
             [
                 torch.cat(
-                    (torch.ones(length, device="cuda"), torch.zeros(maxlen - length, device="cuda"))
                 )
                 for length in lengths
             ]
@@ -155,7 +156,7 @@ class DisamBert(PreTrainedModel):
             torch.cat(
                 [
                     sentence,
-                    torch.zeros((self.__entities.shape[0], maxlength - length), device="cuda"),
                 ],
                 dim=1,
             )

 class DisamBert(PreTrainedModel):
     def __init__(self, config:PreTrainedConfig):
         super().__init__(config)
+        self.BaseModel = AutoModel.from_pretrained(config.name_or_path,device_map="auto")
         self.tokenizer = AutoTokenizer.from_pretrained(config.name_or_path)
+        with self.BaseModel.device:
+            self.classifier_head = nn.UninitializedParameter()
         self.__entities = None
     @classmethod
         vectors = []
         batch = []
         n = 0
+        with self.BaseModel.device:
+            for entity in entities:
+                entity_ids.append(entity.concept)
+                batch.append(entity.definition)
+                n += 1
+                if n == BATCH_SIZE:
+                    tokens = self.tokenizer(batch, padding=True, return_tensors="pt")
+                    encoding = self.BaseModel(
+                        tokens["input_ids"], tokens["attention_mask"]
+                    )
+                    vectors.append(encoding.last_hidden_state.detach()[:, 0])
+                    n = 0
+                    batch = []
+            if n > 0:
                 tokens = self.tokenizer(batch, padding=True, return_tensors="pt")
                 encoding = self.BaseModel(
+                    tokens["input_ids"], tokens["attention_mask"]
                 )
                 vectors.append(encoding.last_hidden_state.detach()[:, 0])
+            self.__entities = pd.Series(entity_ids)
+            self.config.entities = entity_ids
+            self.classifier_head = nn.Parameter(torch.cat(vectors, dim=0))
     @property
     def entities(self) -> pd.Series:
         )
         all_indices = []
         all_tokens = []
+        with self.BaseModel.device:
+            for sentence, span_indices in zip(sentences, indices, strict=True):
+                indices = []
+                tokens = []
+                last_span = len(span_indices) - 2
+                for i, position in enumerate(span_indices[:-1]):
+                    span = sentence[position : span_indices[i + 1]]
+                    span_tokens = self.tokenizer([span], padding=False)["input_ids"][0]
+                    if i > 0:
+                        span_tokens = span_tokens[1:]
+                    if i < last_span:
+                        span_tokens = span_tokens[:-1]
+                    indices.append(len(span_tokens))
+                    tokens.extend(span_tokens)
+                all_indices.append(indices)
+                all_tokens.append(tokens)
+            sentence_lengths = [len(boundaries) for boundaries in all_indices]
+            maxlen = max(sentence_lengths)
+            batch = self.pad(all_tokens)
+            token_vectors = self.BaseModel(batch.input_ids, batch.attention_mask).last_hidden_state
+            span_vectors = torch.cat(
+                [
+                    torch.vstack(
+                        [
+                            torch.sum(chunk, dim=0)
+                            for chunk in self.split(token_vectors[i], sentence_indices)
+                        ]
+                    )
+                    for (i, sentence_indices) in enumerate(all_indices)
+                ]
+            )
+            logits = torch.einsum("ij,kj->ki", span_vectors, self.classifier_head)
+            split_logits = torch.split(logits, sentence_lengths, dim=1)
+            return torch.stack(
+                [
+                    self.extend_to_max_length(sentence, length, maxlen)
+                    for (sentence, length) in zip(split_logits, sentence_lengths, strict=True)
+                ]
+            )
     def split(self, vectors: torch.Tensor, lengths: list[int]) -> tuple[torch.Tensor, ...]:
         maxlen = vectors.shape[0]
             [
                 sentence + [self.config.pad_token_id] * (maxlen - length)
                 for (sentence, length) in zip(tokens, lengths, strict=True)
+            ]
         )
         attention_mask = torch.vstack(
             [
                 torch.cat(
+                    (torch.ones(length), torch.zeros(maxlen - length))
                 )
                 for length in lengths
             ]
             torch.cat(
                 [
                     sentence,
+                    torch.zeros((self.__entities.shape[0], maxlength - length)),
                 ],
                 dim=1,
             )

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:765767f2ce35a2118f15cef212da9c3e5159a114e6e1aa080942d3e256b12c22
 size 957523088

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d2a927c475b82fe97cb22c4f9e8367a186e66d17a7716fd6fd231d190684f5d
 size 957523088