Upload ConlluTokenClassificationPipeline
Browse files- encoder.py +6 -9
- model.safetensors +2 -2
encoder.py
CHANGED
|
@@ -2,7 +2,7 @@ import torch
|
|
| 2 |
from torch import nn
|
| 3 |
from torch import Tensor, LongTensor
|
| 4 |
|
| 5 |
-
from transformers import AutoTokenizer,
|
| 6 |
|
| 7 |
try:
|
| 8 |
from peft import LoraConfig, get_peft_model
|
|
@@ -28,17 +28,13 @@ class WordTransformerEncoder(nn.Module):
|
|
| 28 |
):
|
| 29 |
super().__init__()
|
| 30 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 31 |
-
self.model =
|
| 32 |
|
| 33 |
if use_lora:
|
| 34 |
if not PEFT_AVAILABLE:
|
| 35 |
raise ImportError("peft is required for LoRA fine-tuning. Install with `pip install peft`.")
|
| 36 |
if lora_target_modules is None:
|
| 37 |
-
|
| 38 |
-
if "roberta" in model_name.lower():
|
| 39 |
-
lora_target_modules = ["q_proj", "v_proj"]
|
| 40 |
-
else:
|
| 41 |
-
lora_target_modules = ["query", "value"]
|
| 42 |
lora_config = LoraConfig(
|
| 43 |
r=lora_r,
|
| 44 |
lora_alpha=lora_alpha,
|
|
@@ -88,7 +84,8 @@ class WordTransformerEncoder(nn.Module):
|
|
| 88 |
])
|
| 89 |
|
| 90 |
# Run model and extract subtokens embeddings from the last layer.
|
| 91 |
-
|
|
|
|
| 92 |
|
| 93 |
# Aggreate subtokens embeddings into words embeddings.
|
| 94 |
# [batch_size, n_words, embedding_size]
|
|
@@ -137,7 +134,7 @@ class WordTransformerEncoder(nn.Module):
|
|
| 137 |
|
| 138 |
def get_embeddings_layer(self):
|
| 139 |
"""Returns the embeddings model."""
|
| 140 |
-
return self.model.embeddings
|
| 141 |
|
| 142 |
def get_transformer_layers(self) -> list[nn.Module]:
|
| 143 |
"""
|
|
|
|
| 2 |
from torch import nn
|
| 3 |
from torch import Tensor, LongTensor
|
| 4 |
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 6 |
|
| 7 |
try:
|
| 8 |
from peft import LoraConfig, get_peft_model
|
|
|
|
| 28 |
):
|
| 29 |
super().__init__()
|
| 30 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 31 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 32 |
|
| 33 |
if use_lora:
|
| 34 |
if not PEFT_AVAILABLE:
|
| 35 |
raise ImportError("peft is required for LoRA fine-tuning. Install with `pip install peft`.")
|
| 36 |
if lora_target_modules is None:
|
| 37 |
+
lora_target_modules = ["query", "value"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
lora_config = LoraConfig(
|
| 39 |
r=lora_r,
|
| 40 |
lora_alpha=lora_alpha,
|
|
|
|
| 84 |
])
|
| 85 |
|
| 86 |
# Run model and extract subtokens embeddings from the last layer.
|
| 87 |
+
outputs = self.model(**subtokens, output_hidden_states=True)
|
| 88 |
+
subtokens_embeddings = outputs.hidden_states[-1]
|
| 89 |
|
| 90 |
# Aggreate subtokens embeddings into words embeddings.
|
| 91 |
# [batch_size, n_words, embedding_size]
|
|
|
|
| 134 |
|
| 135 |
def get_embeddings_layer(self):
|
| 136 |
"""Returns the embeddings model."""
|
| 137 |
+
return self.model.roberta.embeddings
|
| 138 |
|
| 139 |
def get_transformer_layers(self) -> list[nn.Module]:
|
| 140 |
"""
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b27ba6b77caece9754d522ddc5bf9e63844a10a04e972b65961a9baccbb08bf5
|
| 3 |
+
size 1134198480
|