Spaces:
Running
Running
Joshua Lochner
commited on
Commit
·
eaa79a8
1
Parent(s):
d34e3fe
Use logger instead of printing when loading datasets
Browse files- src/shared.py +12 -13
src/shared.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
from transformers.trainer_utils import get_last_checkpoint as glc
|
| 2 |
-
from transformers import Seq2SeqTrainingArguments, TrainingArguments
|
| 3 |
import os
|
| 4 |
from utils import re_findall
|
| 5 |
import logging
|
|
@@ -15,6 +14,17 @@ from typing import Optional
|
|
| 15 |
from dataclasses import dataclass, field
|
| 16 |
from enum import Enum
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
|
| 19 |
|
| 20 |
ACTION_OPTIONS = ['skip', 'mute', 'full']
|
|
@@ -234,7 +244,7 @@ def reset():
|
|
| 234 |
|
| 235 |
def load_datasets(dataset_args: DatasetArguments):
|
| 236 |
|
| 237 |
-
|
| 238 |
data_files = {}
|
| 239 |
|
| 240 |
if dataset_args.train_file is not None:
|
|
@@ -333,17 +343,6 @@ class CustomTrainingArguments(OutputArguments, AdditionalTrainingArguments):
|
|
| 333 |
pass
|
| 334 |
|
| 335 |
|
| 336 |
-
logging.basicConfig()
|
| 337 |
-
logger = logging.getLogger(__name__)
|
| 338 |
-
|
| 339 |
-
# Setup logging
|
| 340 |
-
logging.basicConfig(
|
| 341 |
-
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
| 342 |
-
datefmt='%m/%d/%Y %H:%M:%S',
|
| 343 |
-
handlers=[logging.StreamHandler(sys.stdout)],
|
| 344 |
-
)
|
| 345 |
-
|
| 346 |
-
|
| 347 |
def get_last_checkpoint(training_args):
|
| 348 |
last_checkpoint = None
|
| 349 |
if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
|
|
|
|
| 1 |
from transformers.trainer_utils import get_last_checkpoint as glc
|
|
|
|
| 2 |
import os
|
| 3 |
from utils import re_findall
|
| 4 |
import logging
|
|
|
|
| 14 |
from dataclasses import dataclass, field
|
| 15 |
from enum import Enum
|
| 16 |
|
| 17 |
+
|
| 18 |
+
logging.basicConfig()
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
# Setup logging
|
| 22 |
+
logging.basicConfig(
|
| 23 |
+
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
| 24 |
+
datefmt='%m/%d/%Y %H:%M:%S',
|
| 25 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
|
| 29 |
|
| 30 |
ACTION_OPTIONS = ['skip', 'mute', 'full']
|
|
|
|
| 244 |
|
| 245 |
def load_datasets(dataset_args: DatasetArguments):
|
| 246 |
|
| 247 |
+
logger.info('Reading datasets')
|
| 248 |
data_files = {}
|
| 249 |
|
| 250 |
if dataset_args.train_file is not None:
|
|
|
|
| 343 |
pass
|
| 344 |
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
def get_last_checkpoint(training_args):
|
| 347 |
last_checkpoint = None
|
| 348 |
if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
|