|
|
--- |
|
|
|
|
|
|
|
|
{} |
|
|
--- |
|
|
|
|
|
# Model Card for Model ID |
|
|
|
|
|
<!-- Provide a quick summary of what the model is/does. --> |
|
|
|
|
|
This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1). |
|
|
|
|
|
## Model Details |
|
|
|
|
|
### Model Description |
|
|
|
|
|
<!-- Provide a longer summary of what this model is. --> |
|
|
|
|
|
|
|
|
|
|
|
- **Developed by:** catlove |
|
|
- **Shared by [optional]:** catlove |
|
|
- **Model type:** bert |
|
|
- **Language(s) (NLP):** multi-language |
|
|
- **License:** [More Information Needed] |
|
|
- **Finetuned from model [optional]:** xlm-roberta-large |
|
|
|
|
|
## Uses |
|
|
|
|
|
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. --> |
|
|
|
|
|
### Direct Use |
|
|
|
|
|
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. --> |
|
|
|
|
|
```[python] |
|
|
class CFG: |
|
|
print_freq = 500 |
|
|
num_workers = 0 |
|
|
model = "xlm-roberta-large" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model) |
|
|
gradient_checkpointing = False |
|
|
num_cycles = 0.5 |
|
|
warmup_ratio = 0.1 |
|
|
epochs = 3 |
|
|
encoder_lr = 1e-5 |
|
|
decoder_lr = 1e-4 |
|
|
eps = 1e-6 |
|
|
betas = (0.9, 0.999) |
|
|
batch_size = 32 |
|
|
weight_decay = 0.01 |
|
|
max_grad_norm = 0.012 |
|
|
max_len = 512 |
|
|
n_folds = 5 |
|
|
seed = 42 |
|
|
|
|
|
class custom_model(nn.Module): |
|
|
def __init__(self, cfg): |
|
|
super().__init__() |
|
|
self.cfg = cfg |
|
|
self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states = True) |
|
|
self.config.hidden_dropout = 0.0 |
|
|
self.config.hidden_dropout_prob = 0.0 |
|
|
self.config.attention_dropout = 0.0 |
|
|
self.config.attention_probs_dropout_prob = 0.0 |
|
|
self.model = AutoModel.from_pretrained(cfg.model, config = self.config) |
|
|
if self.cfg.gradient_checkpointing: |
|
|
self.model.gradient_checkpointing_enable() |
|
|
self.pool = MeanPooling() |
|
|
self.fc = nn.Linear(self.config.hidden_size, 1) |
|
|
self._init_weights(self.fc) |
|
|
def _init_weights(self, module): |
|
|
if isinstance(module, nn.Linear): |
|
|
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) |
|
|
if module.bias is not None: |
|
|
module.bias.data.zero_() |
|
|
elif isinstance(module, nn.Embedding): |
|
|
module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) |
|
|
if module.padding_idx is not None: |
|
|
module.weight.data[module.padding_idx].zero_() |
|
|
elif isinstance(module, nn.LayerNorm): |
|
|
module.bias.data.zero_() |
|
|
module.weight.data.fill_(1.0) |
|
|
def feature(self, inputs): |
|
|
outputs = self.model(**inputs) |
|
|
last_hidden_state = outputs.last_hidden_state |
|
|
feature = self.pool(last_hidden_state, inputs['attention_mask']) |
|
|
return feature |
|
|
def forward(self, inputs): |
|
|
feature = self.feature(inputs) |
|
|
output = self.fc(feature) |
|
|
return output |
|
|
|
|
|
model = custom_model(CFG) |
|
|
model.load_state_dict(torch.load('./model_saved/custom_model_weights.pth')['model']) |
|
|
``` |
|
|
|
|
|
## Evaluation |
|
|
|
|
|
Our CV score is 0.3797 using a threshold of 0.029. |
|
|
|
|
|
|