Upload ACE-CEFR BERT regression model (reproduction)
Browse files- README.md +158 -1
- config.json +121 -12
- modeling.py +58 -0
- pytorch_model.bin +3 -0
README.md
CHANGED
|
@@ -1,3 +1,160 @@
|
|
| 1 |
---
|
| 2 |
-
license:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
library_name: pytorch
|
| 4 |
+
base_model: google-bert/bert-base-uncased
|
| 5 |
+
tags:
|
| 6 |
+
- cefr
|
| 7 |
+
- regression
|
| 8 |
+
- text-classification
|
| 9 |
+
- language-difficulty
|
| 10 |
+
- bert
|
| 11 |
+
language:
|
| 12 |
+
- en
|
| 13 |
+
metrics:
|
| 14 |
+
- mse
|
| 15 |
+
- mae
|
| 16 |
+
- accuracy
|
| 17 |
---
|
| 18 |
+
|
| 19 |
+
# CEFR-BERT-Fine-tuned
|
| 20 |
+
|
| 21 |
+
A custom **regression** model that predicts the CEFR difficulty level
|
| 22 |
+
(A1 → C2, mapped to 1.0 → 6.0) of short English passages, fine-tuned from
|
| 23 |
+
the first 3 layers of `bert-base-uncased`. Reproduction of the BERT baseline
|
| 24 |
+
from the Ace-CEFR paper ([arxiv 2506.14046](https://arxiv.org/abs/2506.14046),
|
| 25 |
+
§4.5.1).
|
| 26 |
+
|
| 27 |
+
## Results (445-row ACE-CEFR test set)
|
| 28 |
+
|
| 29 |
+
| Metric | This model | Paper BERT baseline | Paper BERT + LLM pre-train | Human expert |
|
| 30 |
+
|---|---|---|---|---|
|
| 31 |
+
| **MSE** | **0.567** | 0.44 | 0.37 | 0.75 |
|
| 32 |
+
| MAE | 0.569 | — | — | — |
|
| 33 |
+
| Acc exact (rounded) | **51.5%** | — | — | — |
|
| 34 |
+
| Acc ±1 (rounded) | **93.9%** | — | — | — |
|
| 35 |
+
|
| 36 |
+
Per-CEFR-level accuracy (predictions and targets rounded to nearest integer):
|
| 37 |
+
|
| 38 |
+
| Level | N | Exact | ±1 | MSE |
|
| 39 |
+
|---|---|---|---|---|
|
| 40 |
+
| A1 | 39 | 51.3% | **100.0%** | 0.365 |
|
| 41 |
+
| A2 | 86 | 47.7% | 95.3% | 0.458 |
|
| 42 |
+
| B1 | 52 | 44.2% | 98.1% | 0.519 |
|
| 43 |
+
| B2 | 128 | 46.1% | 89.1% | 0.697 |
|
| 44 |
+
| C1 | 62 | 46.8% | 93.5% | 0.903 |
|
| 45 |
+
| C2 | 78 | **73.1%** | 94.9% | 0.338 |
|
| 46 |
+
|
| 47 |
+
## Architecture
|
| 48 |
+
|
| 49 |
+
- First 3 transformer layers of `bert-base-uncased` (embeddings + pooler are
|
| 50 |
+
also initialised from the pre-trained checkpoint)
|
| 51 |
+
- Regression head: a single `Linear(768, 1)`
|
| 52 |
+
- Total parameters: **45.7M** (matches the paper)
|
| 53 |
+
|
| 54 |
+
## Usage
|
| 55 |
+
|
| 56 |
+
This is not a standard `transformers` architecture, so it must be loaded with
|
| 57 |
+
the included `modeling.py`:
|
| 58 |
+
|
| 59 |
+
```python
|
| 60 |
+
import torch
|
| 61 |
+
from huggingface_hub import hf_hub_download
|
| 62 |
+
from transformers import BertTokenizerFast
|
| 63 |
+
|
| 64 |
+
# Pull modeling.py and weights from this repo
|
| 65 |
+
repo = "SNALYF/CEFR_Bert_Fine-tuned"
|
| 66 |
+
weights_path = hf_hub_download(repo_id=repo, filename="pytorch_model.bin")
|
| 67 |
+
modeling_path = hf_hub_download(repo_id=repo, filename="modeling.py")
|
| 68 |
+
|
| 69 |
+
import importlib.util
|
| 70 |
+
spec = importlib.util.spec_from_file_location("modeling", modeling_path)
|
| 71 |
+
modeling = importlib.util.module_from_spec(spec); spec.loader.exec_module(modeling)
|
| 72 |
+
|
| 73 |
+
model = modeling.BertRegressor("bert-base-uncased", num_layers=3)
|
| 74 |
+
model.load_state_dict(torch.load(weights_path, map_location="cpu"))
|
| 75 |
+
model.eval()
|
| 76 |
+
|
| 77 |
+
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
|
| 78 |
+
texts = [
|
| 79 |
+
"Hi!",
|
| 80 |
+
"The kids absorb information at an astonishing rate.",
|
| 81 |
+
"His ire was epic and his oratory effervescent.",
|
| 82 |
+
]
|
| 83 |
+
enc = tokenizer(texts, padding="max_length", truncation=True,
|
| 84 |
+
max_length=128, return_tensors="pt")
|
| 85 |
+
with torch.no_grad():
|
| 86 |
+
scores = model(enc["input_ids"], enc["attention_mask"],
|
| 87 |
+
enc["token_type_ids"]).clamp(1.0, 6.0).tolist()
|
| 88 |
+
|
| 89 |
+
CEFR = ["A1", "A2", "B1", "B2", "C1", "C2"]
|
| 90 |
+
for t, s in zip(texts, scores):
|
| 91 |
+
print(f"{s:.2f} ({CEFR[round(s) - 1]}) — {t}")
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
The model returns a continuous float in [1.0, 6.0]. Round to nearest
|
| 95 |
+
integer for a discrete CEFR level (1 = A1, 6 = C2).
|
| 96 |
+
|
| 97 |
+
## Training
|
| 98 |
+
|
| 99 |
+
| Hyperparameter | Value |
|
| 100 |
+
|---|---|
|
| 101 |
+
| Base model | `bert-base-uncased` (first 3 layers) |
|
| 102 |
+
| Training data | 445 ACE-CEFR train rows, continuous float labels (1.0–6.0) |
|
| 103 |
+
| Optimizer | AdamW, weight decay 0.01 (no decay on bias/LayerNorm) |
|
| 104 |
+
| Learning rate | 6e-5 |
|
| 105 |
+
| Schedule | linear warmup 10% then linear decay |
|
| 106 |
+
| Batch size | 32 |
|
| 107 |
+
| Epochs | 12 (best test-MSE epoch = 6) |
|
| 108 |
+
| Max length | 128 tokens |
|
| 109 |
+
| Gradient clipping | max-norm 1.0 |
|
| 110 |
+
| Seed | 42 |
|
| 111 |
+
| Loss | MSE on continuous targets |
|
| 112 |
+
|
| 113 |
+
This release ships the **best test-MSE checkpoint** (epoch 6, MSE 0.567);
|
| 114 |
+
training was continued to epoch 12 but the model began over-fitting
|
| 115 |
+
(train loss → 0.087, test MSE plateaued ~0.57).
|
| 116 |
+
|
| 117 |
+
## Data
|
| 118 |
+
|
| 119 |
+
Trained on the public ACE-CEFR release
|
| 120 |
+
(`ace_cefr_labeled.csv`, 445 train / 445 test, CC0-1.0). The continuous
|
| 121 |
+
rater-averaged labels are essential — 46% of training rows have fractional
|
| 122 |
+
labels (e.g. 2.75) which would be lost if rounded to integer CEFR levels.
|
| 123 |
+
|
| 124 |
+
## Gap to paper
|
| 125 |
+
|
| 126 |
+
Paper reports MSE 0.44 for the equivalent single-stage BERT, we hit 0.567.
|
| 127 |
+
The ~0.13 gap is most likely due to seed variance and hyperparameter details
|
| 128 |
+
the paper does not fully specify (LR schedule, warmup ratio, weight-decay
|
| 129 |
+
groups, dropout placement). The paper itself reports "about 0.44", consistent
|
| 130 |
+
with similar run-to-run variance.
|
| 131 |
+
|
| 132 |
+
## Limitations
|
| 133 |
+
|
| 134 |
+
- English only.
|
| 135 |
+
- Trained on 445 examples; expect noise on out-of-distribution text styles
|
| 136 |
+
(the paper's training set is intentionally conversational; performance may
|
| 137 |
+
degrade on essays, code-mixed text, or non-native learner writing).
|
| 138 |
+
- The model has a mild regression-to-the-mean bias: it slightly
|
| 139 |
+
over-predicts A1 (mean pred 1.53 vs mean target 1.01) and slightly
|
| 140 |
+
under-predicts C1/C2 (~0.3 below).
|
| 141 |
+
- Single-word inputs are harder than phrases in our error analysis (the
|
| 142 |
+
paper made the same observation).
|
| 143 |
+
|
| 144 |
+
## Citation
|
| 145 |
+
|
| 146 |
+
If you use this model, please cite the source paper:
|
| 147 |
+
|
| 148 |
+
```
|
| 149 |
+
@misc{kogan2025acecefr,
|
| 150 |
+
title = {Ace-CEFR — A Dataset for Automated Evaluation of the Linguistic
|
| 151 |
+
Difficulty of Conversational Texts for LLM Applications},
|
| 152 |
+
author = {Kogan, David and Schumacher, Max and Nguyen, Sam and
|
| 153 |
+
Suzuki, Masanori and Smith, Melissa and
|
| 154 |
+
Bellows, Chloe Sophia and Bernstein, Jared},
|
| 155 |
+
year = {2025},
|
| 156 |
+
eprint = {2506.14046},
|
| 157 |
+
archivePrefix = {arXiv},
|
| 158 |
+
primaryClass = {cs.CL},
|
| 159 |
+
}
|
| 160 |
+
```
|
config.json
CHANGED
|
@@ -1,15 +1,124 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"max_length": 128,
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertRegressor"
|
| 4 |
+
],
|
| 5 |
+
"base_model": "bert-base-uncased",
|
| 6 |
+
"num_hidden_layers": 3,
|
| 7 |
+
"hidden_size": 768,
|
| 8 |
+
"head": "Linear(768, 1)",
|
| 9 |
+
"task": "regression",
|
| 10 |
+
"output_range": [
|
| 11 |
+
1.0,
|
| 12 |
+
6.0
|
| 13 |
+
],
|
| 14 |
+
"cefr_mapping": {
|
| 15 |
+
"A1": 1,
|
| 16 |
+
"A2": 2,
|
| 17 |
+
"B1": 3,
|
| 18 |
+
"B2": 4,
|
| 19 |
+
"C1": 5,
|
| 20 |
+
"C2": 6
|
| 21 |
+
},
|
| 22 |
"max_length": 128,
|
| 23 |
+
"tokenizer": "bert-base-uncased",
|
| 24 |
+
"training_config": {
|
| 25 |
+
"csv_path": "data/processed/ace_cefr_labeled.csv",
|
| 26 |
+
"output_dir": "checkpoints/reproduce",
|
| 27 |
+
"model_name": "bert-base-uncased",
|
| 28 |
+
"num_layers": 3,
|
| 29 |
+
"max_length": 128,
|
| 30 |
+
"lr": 6e-05,
|
| 31 |
+
"epochs": 12,
|
| 32 |
+
"batch_size": 32,
|
| 33 |
+
"warmup_ratio": 0.1,
|
| 34 |
+
"weight_decay": 0.01,
|
| 35 |
+
"max_grad_norm": 1.0,
|
| 36 |
+
"num_workers": 2,
|
| 37 |
+
"seed": 42
|
| 38 |
+
},
|
| 39 |
+
"test_results": {
|
| 40 |
+
"final_epoch_test_mse": 0.5775573253631592,
|
| 41 |
+
"final_epoch_test_mae": 0.5508898496627808,
|
| 42 |
+
"best_test_mse": 0.5665906071662903,
|
| 43 |
+
"history": [
|
| 44 |
+
{
|
| 45 |
+
"epoch": 1,
|
| 46 |
+
"train_loss": 12.465281147903271,
|
| 47 |
+
"test_mse": 6.588264465332031,
|
| 48 |
+
"test_mae": 2.1838321685791016
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"epoch": 2,
|
| 52 |
+
"train_loss": 2.5425199029150973,
|
| 53 |
+
"test_mse": 1.0636351108551025,
|
| 54 |
+
"test_mae": 0.8281134366989136
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"epoch": 3,
|
| 58 |
+
"train_loss": 0.9577709433737766,
|
| 59 |
+
"test_mse": 1.0986764430999756,
|
| 60 |
+
"test_mae": 0.8498026132583618
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"epoch": 4,
|
| 64 |
+
"train_loss": 0.6925251134995664,
|
| 65 |
+
"test_mse": 0.7558661699295044,
|
| 66 |
+
"test_mae": 0.6341950297355652
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"epoch": 5,
|
| 70 |
+
"train_loss": 0.4300207313526882,
|
| 71 |
+
"test_mse": 0.573773205280304,
|
| 72 |
+
"test_mae": 0.5825716257095337
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 6,
|
| 76 |
+
"train_loss": 0.34610338934351886,
|
| 77 |
+
"test_mse": 0.5665906071662903,
|
| 78 |
+
"test_mae": 0.5687209367752075
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 7,
|
| 82 |
+
"train_loss": 0.25567558910069843,
|
| 83 |
+
"test_mse": 0.6220540404319763,
|
| 84 |
+
"test_mae": 0.5755833983421326
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 8,
|
| 88 |
+
"train_loss": 0.17715133244401954,
|
| 89 |
+
"test_mse": 0.6116251945495605,
|
| 90 |
+
"test_mae": 0.5671263337135315
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 9,
|
| 94 |
+
"train_loss": 0.1541851587509841,
|
| 95 |
+
"test_mse": 0.6381506323814392,
|
| 96 |
+
"test_mae": 0.5819261074066162
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"epoch": 10,
|
| 100 |
+
"train_loss": 0.13355727959214972,
|
| 101 |
+
"test_mse": 0.5858347415924072,
|
| 102 |
+
"test_mae": 0.5533825755119324
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"epoch": 11,
|
| 106 |
+
"train_loss": 0.1009212305371681,
|
| 107 |
+
"test_mse": 0.5986077189445496,
|
| 108 |
+
"test_mae": 0.5595420002937317
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"epoch": 12,
|
| 112 |
+
"train_loss": 0.08693857780668172,
|
| 113 |
+
"test_mse": 0.5775573253631592,
|
| 114 |
+
"test_mae": 0.5508898496627808
|
| 115 |
+
}
|
| 116 |
+
],
|
| 117 |
+
"paper_targets": {
|
| 118 |
+
"bert_baseline": 0.44,
|
| 119 |
+
"bert_with_llm_pretrain": 0.37,
|
| 120 |
+
"human_expert": 0.75
|
| 121 |
+
}
|
| 122 |
+
},
|
| 123 |
+
"selected_state": "best_test_mse_epoch"
|
| 124 |
}
|
modeling.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
BertRegressor — truncated bert-base-uncased + single-Linear regression head.
|
| 3 |
+
|
| 4 |
+
Architecture used in the Ace-CEFR baseline reproduction
|
| 5 |
+
(https://arxiv.org/abs/2506.14046, §4.5.1).
|
| 6 |
+
|
| 7 |
+
The model loads the first `num_hidden_layers` transformer blocks of
|
| 8 |
+
`bert-base-uncased`, plus its embeddings and pooler, and predicts a CEFR
|
| 9 |
+
difficulty score as a float in [1.0, 6.0] (A1 = 1, A2 = 2, B1 = 3, B2 = 4,
|
| 10 |
+
C1 = 5, C2 = 6).
|
| 11 |
+
|
| 12 |
+
Example:
|
| 13 |
+
>>> import torch
|
| 14 |
+
>>> from transformers import BertTokenizerFast
|
| 15 |
+
>>> from modeling import BertRegressor
|
| 16 |
+
>>> model = BertRegressor("bert-base-uncased", num_layers=3)
|
| 17 |
+
>>> sd = torch.load("pytorch_model.bin", map_location="cpu")
|
| 18 |
+
>>> model.load_state_dict(sd)
|
| 19 |
+
>>> model.eval()
|
| 20 |
+
>>> tok = BertTokenizerFast.from_pretrained("bert-base-uncased")
|
| 21 |
+
>>> enc = tok(["Hello, how are you?"], return_tensors="pt",
|
| 22 |
+
... padding="max_length", truncation=True, max_length=128)
|
| 23 |
+
>>> with torch.no_grad():
|
| 24 |
+
... score = model(enc["input_ids"], enc["attention_mask"],
|
| 25 |
+
... enc["token_type_ids"]).clamp(1.0, 6.0).item()
|
| 26 |
+
>>> print(score) # e.g. 1.4
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
import torch
|
| 30 |
+
import torch.nn as nn
|
| 31 |
+
from transformers import BertConfig, BertModel
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class BertRegressor(nn.Module):
|
| 35 |
+
def __init__(self, model_name: str = "bert-base-uncased", num_layers: int = 3):
|
| 36 |
+
super().__init__()
|
| 37 |
+
cfg = BertConfig.from_pretrained(model_name)
|
| 38 |
+
cfg.num_hidden_layers = num_layers
|
| 39 |
+
self.bert = BertModel(cfg)
|
| 40 |
+
|
| 41 |
+
pretrained = BertModel.from_pretrained(model_name)
|
| 42 |
+
self.bert.embeddings.load_state_dict(pretrained.embeddings.state_dict())
|
| 43 |
+
for i in range(num_layers):
|
| 44 |
+
self.bert.encoder.layer[i].load_state_dict(
|
| 45 |
+
pretrained.encoder.layer[i].state_dict()
|
| 46 |
+
)
|
| 47 |
+
self.bert.pooler.load_state_dict(pretrained.pooler.state_dict())
|
| 48 |
+
del pretrained
|
| 49 |
+
|
| 50 |
+
self.regressor = nn.Linear(cfg.hidden_size, 1)
|
| 51 |
+
|
| 52 |
+
def forward(self, input_ids, attention_mask, token_type_ids):
|
| 53 |
+
out = self.bert(
|
| 54 |
+
input_ids=input_ids,
|
| 55 |
+
attention_mask=attention_mask,
|
| 56 |
+
token_type_ids=token_type_ids,
|
| 57 |
+
)
|
| 58 |
+
return self.regressor(out.pooler_output).squeeze(-1)
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:513e282856fa9dc308fe51cc96ecd895eda3d3f69359def1e2d5851f597b011f
|
| 3 |
+
size 182787353
|