Upload model

Files changed (3) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "ekolasky/CustomLEDForQAonNQ",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "architectures": [
@@ -14,6 +14,9 @@
     1024,
     1024
   ],
   "bos_token_id": 0,
   "classif_dropout": 0.0,
   "classifier_dropout": 0.0,

 {
+  "_name_or_path": "allenai/led-base-16384",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "architectures": [
     1024,
     1024
   ],
+  "auto_map": {
+    "AutoModel": "modeling_CustomLEDForQA.CustomLEDForQAModel"
+  },
   "bos_token_id": 0,
   "classif_dropout": 0.0,
   "classifier_dropout": 0.0,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29b6ebfc6c38c8764ab7440f03873a73b5f3eb75667bcd2e4a50df3d2b6c4e33
 size 417405656

 version https://git-lfs.github.com/spec/v1
+oid sha256:7de0c365541e0d36d3fe80cb356e4da80915c6c57c12e9caff204628517f478a
 size 417405656

modeling_CustomLEDForQA.py CHANGED Viewed

@@ -29,12 +29,23 @@ class CustomLEDForQAModel(LEDPreTrainedModel):
         end_logits = end_logits.squeeze(-1).contiguous()
         total_loss = None
         if start_positions is not None and end_positions is not None:
             loss_fct = nn.CrossEntropyLoss()
-            start_loss = loss_fct(start_logits, start_positions[0])
-            end_loss = loss_fct(end_logits, end_positions[0])
             total_loss = (start_loss + end_loss) / 2
         return {

         end_logits = end_logits.squeeze(-1).contiguous()
         total_loss = None
         if start_positions is not None and end_positions is not None:
             loss_fct = nn.CrossEntropyLoss()
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+            # start_loss = loss_fct(start_logits[index], start_positions[index][0])
+            # end_loss = loss_fct(end_logits[index], end_positions[index][0])
             total_loss = (start_loss + end_loss) / 2
         return {