mr-kush commited on
Commit
625fb41
·
1 Parent(s): 100c22a

Add dataset metadata handling in training pipeline

Browse files
Files changed (1) hide show
  1. train_model.py +4 -1
train_model.py CHANGED
@@ -22,10 +22,12 @@ def run_grievance_training_pipeline():
22
 
23
 
24
  print(f"[{time.strftime('%H:%M:%S')}] Loading dataset from hub: {configs.dataset_repo_id} ...", flush=True)
25
- dataset = load_dataset_from_hub(
26
  model_repo=configs.dataset_repo_id,
27
  hf_token=configs.hf_token
28
  )
 
 
29
 
30
  # Print dataset splits and sizes if available
31
  def _safe_len(split):
@@ -55,6 +57,7 @@ def run_grievance_training_pipeline():
55
  train_dataset=dataset['train'],
56
  eval_dataset=dataset['eval'],
57
  test_dataset=dataset['test'],
 
58
  space_repo_id=configs.space_repo_id,
59
  hf_training_args={"hub_model_id": configs.hub_model_id},
60
  api_endpoint=configs.api_endpoint,
 
22
 
23
 
24
  print(f"[{time.strftime('%H:%M:%S')}] Loading dataset from hub: {configs.dataset_repo_id} ...", flush=True)
25
+ data = load_dataset_from_hub(
26
  model_repo=configs.dataset_repo_id,
27
  hf_token=configs.hf_token
28
  )
29
+ dataset = data['dataset']
30
+ dataset_metadata = data['metadata']
31
 
32
  # Print dataset splits and sizes if available
33
  def _safe_len(split):
 
57
  train_dataset=dataset['train'],
58
  eval_dataset=dataset['eval'],
59
  test_dataset=dataset['test'],
60
+ dataset_metadata= dataset_metadata,
61
  space_repo_id=configs.space_repo_id,
62
  hf_training_args={"hub_model_id": configs.hub_model_id},
63
  api_endpoint=configs.api_endpoint,