Upload model
Browse files- config.json +4 -1
- model.safetensors +1 -1
- modeling_CustomLEDForQA.py +13 -2
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"activation_dropout": 0.0,
|
| 4 |
"activation_function": "gelu",
|
| 5 |
"architectures": [
|
|
@@ -14,6 +14,9 @@
|
|
| 14 |
1024,
|
| 15 |
1024
|
| 16 |
],
|
|
|
|
|
|
|
|
|
|
| 17 |
"bos_token_id": 0,
|
| 18 |
"classif_dropout": 0.0,
|
| 19 |
"classifier_dropout": 0.0,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "allenai/led-base-16384",
|
| 3 |
"activation_dropout": 0.0,
|
| 4 |
"activation_function": "gelu",
|
| 5 |
"architectures": [
|
|
|
|
| 14 |
1024,
|
| 15 |
1024
|
| 16 |
],
|
| 17 |
+
"auto_map": {
|
| 18 |
+
"AutoModel": "modeling_CustomLEDForQA.CustomLEDForQAModel"
|
| 19 |
+
},
|
| 20 |
"bos_token_id": 0,
|
| 21 |
"classif_dropout": 0.0,
|
| 22 |
"classifier_dropout": 0.0,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 417405656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7de0c365541e0d36d3fe80cb356e4da80915c6c57c12e9caff204628517f478a
|
| 3 |
size 417405656
|
modeling_CustomLEDForQA.py
CHANGED
|
@@ -29,12 +29,23 @@ class CustomLEDForQAModel(LEDPreTrainedModel):
|
|
| 29 |
end_logits = end_logits.squeeze(-1).contiguous()
|
| 30 |
|
| 31 |
total_loss = None
|
|
|
|
| 32 |
if start_positions is not None and end_positions is not None:
|
| 33 |
|
| 34 |
loss_fct = nn.CrossEntropyLoss()
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
total_loss = (start_loss + end_loss) / 2
|
|
|
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
return {
|
|
|
|
| 29 |
end_logits = end_logits.squeeze(-1).contiguous()
|
| 30 |
|
| 31 |
total_loss = None
|
| 32 |
+
|
| 33 |
if start_positions is not None and end_positions is not None:
|
| 34 |
|
| 35 |
loss_fct = nn.CrossEntropyLoss()
|
| 36 |
+
|
| 37 |
+
if len(start_positions.size()) > 1:
|
| 38 |
+
start_positions = start_positions.squeeze(-1)
|
| 39 |
+
if len(end_positions.size()) > 1:
|
| 40 |
+
end_positions = end_positions.squeeze(-1)
|
| 41 |
+
|
| 42 |
+
start_loss = loss_fct(start_logits, start_positions)
|
| 43 |
+
end_loss = loss_fct(end_logits, end_positions)
|
| 44 |
+
# start_loss = loss_fct(start_logits[index], start_positions[index][0])
|
| 45 |
+
# end_loss = loss_fct(end_logits[index], end_positions[index][0])
|
| 46 |
total_loss = (start_loss + end_loss) / 2
|
| 47 |
+
|
| 48 |
+
|
| 49 |
|
| 50 |
|
| 51 |
return {
|