Spaces:
Sleeping
Sleeping
OliverPerrin
commited on
Commit
·
e3422d2
1
Parent(s):
5c41b92
Updated Training run, fixed dataset langauge issue
Browse files
outputs/training_history.json
CHANGED
|
@@ -1,56 +1,56 @@
|
|
| 1 |
{
|
| 2 |
"train_epoch_1": {
|
| 3 |
-
"summarization_loss": 3.
|
| 4 |
-
"summarization_rouge_like": 0.
|
| 5 |
-
"emotion_loss": 0.
|
| 6 |
"emotion_f1": 0.08081954432030519,
|
| 7 |
-
"topic_loss": 1.
|
| 8 |
-
"topic_accuracy": 0.
|
| 9 |
-
"total_loss": 5.
|
| 10 |
},
|
| 11 |
"val_epoch_1": {
|
| 12 |
-
"summarization_loss": 3.
|
| 13 |
-
"summarization_rouge_like": 0.
|
| 14 |
-
"emotion_loss": 0.
|
| 15 |
"emotion_f1": 0.08195805028080941,
|
| 16 |
-
"topic_loss": 1.
|
| 17 |
-
"topic_accuracy": 0.
|
| 18 |
-
"total_loss": 5.
|
| 19 |
},
|
| 20 |
"train_epoch_2": {
|
| 21 |
-
"summarization_loss": 3.
|
| 22 |
-
"summarization_rouge_like": 0.
|
| 23 |
-
"emotion_loss": 0.
|
| 24 |
"emotion_f1": 0.08081954436997572,
|
| 25 |
-
"topic_loss": 1.
|
| 26 |
-
"topic_accuracy": 0.
|
| 27 |
-
"total_loss": 5.
|
| 28 |
},
|
| 29 |
"val_epoch_2": {
|
| 30 |
-
"summarization_loss": 3.
|
| 31 |
-
"summarization_rouge_like": 0.
|
| 32 |
-
"emotion_loss": 0.
|
| 33 |
"emotion_f1": 0.08195805028080941,
|
| 34 |
"topic_loss": 1.9172309080759684,
|
| 35 |
-
"topic_accuracy": 0.
|
| 36 |
-
"total_loss": 4.
|
| 37 |
},
|
| 38 |
"train_epoch_3": {
|
| 39 |
-
"summarization_loss": 3.
|
| 40 |
-
"summarization_rouge_like": 0.
|
| 41 |
-
"emotion_loss": 0.
|
| 42 |
"emotion_f1": 0.08081954464316368,
|
| 43 |
-
"topic_loss": 1.
|
| 44 |
-
"topic_accuracy": 0.
|
| 45 |
-
"total_loss": 4.
|
| 46 |
},
|
| 47 |
"val_epoch_3": {
|
| 48 |
-
"summarization_loss": 3.
|
| 49 |
-
"summarization_rouge_like": 0.
|
| 50 |
-
"emotion_loss": 0.
|
| 51 |
"emotion_f1": 0.08195805028080941,
|
| 52 |
-
"topic_loss": 1.
|
| 53 |
-
"topic_accuracy": 0.
|
| 54 |
-
"total_loss": 4.
|
| 55 |
}
|
| 56 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"train_epoch_1": {
|
| 3 |
+
"summarization_loss": 3.22803662776947,
|
| 4 |
+
"summarization_rouge_like": 0.1796494591813519,
|
| 5 |
+
"emotion_loss": 0.6834881261984507,
|
| 6 |
"emotion_f1": 0.08081954432030519,
|
| 7 |
+
"topic_loss": 1.9448203102747599,
|
| 8 |
+
"topic_accuracy": 0.1610000000000002,
|
| 9 |
+
"total_loss": 5.225678972204526
|
| 10 |
},
|
| 11 |
"val_epoch_1": {
|
| 12 |
+
"summarization_loss": 3.0928489446640013,
|
| 13 |
+
"summarization_rouge_like": 0.19274763643316278,
|
| 14 |
+
"emotion_loss": 0.6717268188794454,
|
| 15 |
"emotion_f1": 0.08195805028080941,
|
| 16 |
+
"topic_loss": 1.9391145865122477,
|
| 17 |
+
"topic_accuracy": 0.14111111111111113,
|
| 18 |
+
"total_loss": 5.069996466239293
|
| 19 |
},
|
| 20 |
"train_epoch_2": {
|
| 21 |
+
"summarization_loss": 3.084542968273163,
|
| 22 |
+
"summarization_rouge_like": 0.19060719636645715,
|
| 23 |
+
"emotion_loss": 0.6423822633425395,
|
| 24 |
"emotion_f1": 0.08081954436997572,
|
| 25 |
+
"topic_loss": 1.9292031232515972,
|
| 26 |
+
"topic_accuracy": 0.28200000000000003,
|
| 27 |
+
"total_loss": 5.012717924912771
|
| 28 |
},
|
| 29 |
"val_epoch_2": {
|
| 30 |
+
"summarization_loss": 3.052287793159485,
|
| 31 |
+
"summarization_rouge_like": 0.19201808441012516,
|
| 32 |
+
"emotion_loss": 0.5912736018498739,
|
| 33 |
"emotion_f1": 0.08195805028080941,
|
| 34 |
"topic_loss": 1.9172309080759684,
|
| 35 |
+
"topic_accuracy": 0.36740740740740746,
|
| 36 |
+
"total_loss": 4.89781364997228
|
| 37 |
},
|
| 38 |
"train_epoch_3": {
|
| 39 |
+
"summarization_loss": 3.042828369140625,
|
| 40 |
+
"summarization_rouge_like": 0.19310921614611898,
|
| 41 |
+
"emotion_loss": 0.5563966218630473,
|
| 42 |
"emotion_f1": 0.08081954464316368,
|
| 43 |
+
"topic_loss": 1.9150755230585734,
|
| 44 |
+
"topic_accuracy": 0.37933333333333347,
|
| 45 |
+
"total_loss": 4.834961063464482
|
| 46 |
},
|
| 47 |
"val_epoch_3": {
|
| 48 |
+
"summarization_loss": 3.050085465113322,
|
| 49 |
+
"summarization_rouge_like": 0.19260925200369436,
|
| 50 |
+
"emotion_loss": 0.5183704813321431,
|
| 51 |
"emotion_f1": 0.08195805028080941,
|
| 52 |
+
"topic_loss": 1.909103000164032,
|
| 53 |
+
"topic_accuracy": 0.37074074074074076,
|
| 54 |
+
"total_loss": 4.782192687193553
|
| 55 |
}
|
| 56 |
}
|
scripts/build_discovery_dataset.py
CHANGED
|
@@ -15,20 +15,19 @@ Preprocessing includes:
|
|
| 15 |
import json
|
| 16 |
import random
|
| 17 |
import re
|
| 18 |
-
|
| 19 |
from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
import torch
|
| 22 |
from datasets import Dataset
|
| 23 |
from tqdm import tqdm
|
| 24 |
|
| 25 |
-
# Add project root to path
|
| 26 |
-
import sys
|
| 27 |
-
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 28 |
-
|
| 29 |
from src.inference.factory import create_inference_pipeline
|
| 30 |
|
| 31 |
-
|
| 32 |
# --------------- Text Quality Filters ---------------
|
| 33 |
|
| 34 |
# Patterns that indicate garbage/metadata text
|
|
|
|
| 15 |
import json
|
| 16 |
import random
|
| 17 |
import re
|
| 18 |
+
import sys
|
| 19 |
from collections import defaultdict
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
# Add project root to path
|
| 23 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 24 |
|
| 25 |
import torch
|
| 26 |
from datasets import Dataset
|
| 27 |
from tqdm import tqdm
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
from src.inference.factory import create_inference_pipeline
|
| 30 |
|
|
|
|
| 31 |
# --------------- Text Quality Filters ---------------
|
| 32 |
|
| 33 |
# Patterns that indicate garbage/metadata text
|