lucweber commited on
Commit
f84cc1f
·
verified ·
1 Parent(s): e346e0a

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +10 -18
model.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from typing import Optional
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  import torch.nn as nn
6
 
@@ -88,34 +88,26 @@ class CausalLMForRegression(nn.Module):
88
 
89
  @classmethod
90
  def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
91
-
92
  kwargs.setdefault("output_hidden_states", True)
93
 
94
- base_model = AutoModelForCausalLM.from_pretrained(
95
- pretrained_model_name_or_path,
96
- *model_args,
97
- **kwargs
98
  )
99
 
100
- # Create an uninitialized instance of CausalLMForRegression
101
  instance = cls.__new__(cls)
102
  nn.Module.__init__(instance)
103
-
104
  instance.model = base_model
105
- instance.regression_head = nn.Linear(
106
- base_model.config.hidden_size, 1
107
- )
108
- instance._keys_to_ignore_on_save = []
109
 
110
- # Load the regression head separately
111
- head_path = os.path.join(
112
- pretrained_model_name_or_path, "regression_head.bin"
113
- )
114
  if os.path.exists(head_path):
115
- state = torch.load(head_path, map_location="cpu")
116
- instance.regression_head.load_state_dict(state)
 
117
  else:
118
  print("No regression head found – initialising randomly.")
 
119
  return instance
120
 
121
  @torch.no_grad()
 
1
  import os
2
  from typing import Optional
3
+ from transformers import Qwen3ForCausalLM, AutoTokenizer
4
  import torch
5
  import torch.nn as nn
6
 
 
88
 
89
  @classmethod
90
  def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
91
+ # make sure hidden states are returned
92
  kwargs.setdefault("output_hidden_states", True)
93
 
94
+ base_model = Qwen3ForCausalLM.from_pretrained(
95
+ pretrained_model_name_or_path, *model_args, **kwargs
 
 
96
  )
97
 
 
98
  instance = cls.__new__(cls)
99
  nn.Module.__init__(instance)
 
100
  instance.model = base_model
101
+ instance.regression_head = nn.Linear(base_model.config.hidden_size, 1)
 
 
 
102
 
103
+ head_path = os.path.join(pretrained_model_name_or_path, "regression_head.bin")
 
 
 
104
  if os.path.exists(head_path):
105
+ instance.regression_head.load_state_dict(
106
+ torch.load(head_path, map_location="cpu")
107
+ )
108
  else:
109
  print("No regression head found – initialising randomly.")
110
+ instance._keys_to_ignore_on_save = []
111
  return instance
112
 
113
  @torch.no_grad()