Spaces:
Runtime error
Runtime error
Add dataset metadata handling in training pipeline
Browse files- train_model.py +4 -1
train_model.py
CHANGED
|
@@ -22,10 +22,12 @@ def run_grievance_training_pipeline():
|
|
| 22 |
|
| 23 |
|
| 24 |
print(f"[{time.strftime('%H:%M:%S')}] Loading dataset from hub: {configs.dataset_repo_id} ...", flush=True)
|
| 25 |
-
|
| 26 |
model_repo=configs.dataset_repo_id,
|
| 27 |
hf_token=configs.hf_token
|
| 28 |
)
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Print dataset splits and sizes if available
|
| 31 |
def _safe_len(split):
|
|
@@ -55,6 +57,7 @@ def run_grievance_training_pipeline():
|
|
| 55 |
train_dataset=dataset['train'],
|
| 56 |
eval_dataset=dataset['eval'],
|
| 57 |
test_dataset=dataset['test'],
|
|
|
|
| 58 |
space_repo_id=configs.space_repo_id,
|
| 59 |
hf_training_args={"hub_model_id": configs.hub_model_id},
|
| 60 |
api_endpoint=configs.api_endpoint,
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
print(f"[{time.strftime('%H:%M:%S')}] Loading dataset from hub: {configs.dataset_repo_id} ...", flush=True)
|
| 25 |
+
data = load_dataset_from_hub(
|
| 26 |
model_repo=configs.dataset_repo_id,
|
| 27 |
hf_token=configs.hf_token
|
| 28 |
)
|
| 29 |
+
dataset = data['dataset']
|
| 30 |
+
dataset_metadata = data['metadata']
|
| 31 |
|
| 32 |
# Print dataset splits and sizes if available
|
| 33 |
def _safe_len(split):
|
|
|
|
| 57 |
train_dataset=dataset['train'],
|
| 58 |
eval_dataset=dataset['eval'],
|
| 59 |
test_dataset=dataset['test'],
|
| 60 |
+
dataset_metadata= dataset_metadata,
|
| 61 |
space_repo_id=configs.space_repo_id,
|
| 62 |
hf_training_args={"hub_model_id": configs.hub_model_id},
|
| 63 |
api_endpoint=configs.api_endpoint,
|