E-katrin commited on
Commit
503e9ee
·
verified ·
1 Parent(s): 7a48cdd

Upload ConlluTokenClassificationPipeline

Browse files
Files changed (2) hide show
  1. encoder.py +6 -9
  2. model.safetensors +2 -2
encoder.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  from torch import nn
3
  from torch import Tensor, LongTensor
4
 
5
- from transformers import AutoTokenizer, AutoModel
6
 
7
  try:
8
  from peft import LoraConfig, get_peft_model
@@ -28,17 +28,13 @@ class WordTransformerEncoder(nn.Module):
28
  ):
29
  super().__init__()
30
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
31
- self.model = AutoModel.from_pretrained(model_name)
32
 
33
  if use_lora:
34
  if not PEFT_AVAILABLE:
35
  raise ImportError("peft is required for LoRA fine-tuning. Install with `pip install peft`.")
36
  if lora_target_modules is None:
37
- # XLM-RoBERTa и Roberta-family
38
- if "roberta" in model_name.lower():
39
- lora_target_modules = ["q_proj", "v_proj"]
40
- else:
41
- lora_target_modules = ["query", "value"]
42
  lora_config = LoraConfig(
43
  r=lora_r,
44
  lora_alpha=lora_alpha,
@@ -88,7 +84,8 @@ class WordTransformerEncoder(nn.Module):
88
  ])
89
 
90
  # Run model and extract subtokens embeddings from the last layer.
91
- subtokens_embeddings = self.model(**subtokens).last_hidden_state
 
92
 
93
  # Aggreate subtokens embeddings into words embeddings.
94
  # [batch_size, n_words, embedding_size]
@@ -137,7 +134,7 @@ class WordTransformerEncoder(nn.Module):
137
 
138
  def get_embeddings_layer(self):
139
  """Returns the embeddings model."""
140
- return self.model.embeddings
141
 
142
  def get_transformer_layers(self) -> list[nn.Module]:
143
  """
 
2
  from torch import nn
3
  from torch import Tensor, LongTensor
4
 
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
 
7
  try:
8
  from peft import LoraConfig, get_peft_model
 
28
  ):
29
  super().__init__()
30
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
31
+ self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
32
 
33
  if use_lora:
34
  if not PEFT_AVAILABLE:
35
  raise ImportError("peft is required for LoRA fine-tuning. Install with `pip install peft`.")
36
  if lora_target_modules is None:
37
+ lora_target_modules = ["query", "value"]
 
 
 
 
38
  lora_config = LoraConfig(
39
  r=lora_r,
40
  lora_alpha=lora_alpha,
 
84
  ])
85
 
86
  # Run model and extract subtokens embeddings from the last layer.
87
+ outputs = self.model(**subtokens, output_hidden_states=True)
88
+ subtokens_embeddings = outputs.hidden_states[-1]
89
 
90
  # Aggreate subtokens embeddings into words embeddings.
91
  # [batch_size, n_words, embedding_size]
 
134
 
135
  def get_embeddings_layer(self):
136
  """Returns the embeddings model."""
137
+ return self.model.roberta.embeddings
138
 
139
  def get_transformer_layers(self) -> list[nn.Module]:
140
  """
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37137a12604aa1ee98f7cc4627a3ed76c63cdb9eb9d2fd02c4a73aaf17325dae
3
- size 1134190536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27ba6b77caece9754d522ddc5bf9e63844a10a04e972b65961a9baccbb08bf5
3
+ size 1134198480