File size: 1,382 Bytes
c01955c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import torch
from torch.utils.data import DataLoader,Dataset
from src.Predictors.constants import DEVICE
class ResumeDataset(Dataset):
    def __init__(self,data):
        self.sample=[prepare_input(x) for _,x in data.iterrows()]

    def __len__(self):
        return len(self.sample)


    def __getitem__(self, idx):
        text,labels=self.sample[idx]

        enc=tokenize(text)

        return {
            "input_idx":enc['input_ids'].squeeze(0),
            "attention_mask":enc['attention_mask'].squeeze(0),
            "labels":torch.tensor(labels,dtype=torch.float)

        }
    


import torch.nn as nn
from transformers import AutoModel

class ResumeScore(nn.Module):
    def __init__(self, ):
        super().__init__()

        self.bert=AutoModel.from_pretrained("bert-base-uncased")

        self.regressor=nn.Sequential(
            nn.Linear(768,256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256,2) # macro ,micro

        )

    def forward(self,input_ids,attention_mask):
        outputs=self.bert(input_ids,attention_mask)

        cls_output=outputs.last_hidden_state[:,0] # cls token

        return self.regressor(cls_output)



# dataset=ResumeDataset(data=df)

# dataset_loader=DataLoader(dataset=dataset,batch_size=8,shuffle=True)


# optimizer=torch.optim.AdamW(model.parameters(),lr=2e-5)

# loss_fn=nn.MSELoss()