Siddharth63 commited on
Commit
3114e37
·
1 Parent(s): 608ff23

Update ul2_tasks.py

Browse files
Files changed (1) hide show
  1. ul2_tasks.py +0 -47
ul2_tasks.py CHANGED
@@ -62,9 +62,6 @@ def target_to_key(x, key_map, target_key):
62
  return {**key_map, target_key: x}
63
 
64
 
65
- dataset_shapes = {"train": dataset["train"].num_rows,
66
- "validation": dataset["validation"].num_rows}
67
-
68
  TaskRegistry.add(
69
  "pretrain_medical_ul2",
70
  source=seqio.FunctionDataSource(
@@ -109,47 +106,3 @@ TaskRegistry.add(
109
  },
110
  metric_fns=[metrics.accuracy],
111
  )
112
-
113
- # dataset_name = "gs://medical-siddharth/medical_data"
114
- # dataset_params = {"from_disk_path": dataset_name}
115
-
116
- # if "from_disk_path" in dataset_params:
117
- # dataset = load_from_disk(dataset_params.get("from_disk_path"))
118
- # else:
119
- # dataset = load_dataset(**dataset_params)
120
-
121
- # dataset_shapes = {"train": dataset["train"].num_rows,
122
- # "validation": dataset["validation"].num_rows}
123
-
124
- # TaskRegistry.add(
125
- # "pretrain_medical_ul2",
126
- # source=seqio.FunctionDataSource(
127
- # dataset_fn=functools.partial(dataset_fn, dataset=dataset),
128
- # splits=("train", "validation"),
129
- # caching_permitted=False,
130
- # num_input_examples=dataset_shapes,
131
- # ),
132
- # preprocessors=[
133
- # functools.partial(
134
- # target_to_key, key_map={
135
- # "inputs": None,
136
- # "targets": None,
137
- # }, target_key="targets"),
138
- # seqio.preprocessors.tokenize,
139
- # functools.partial(
140
- # ul2_objective,
141
- # shard_ds=False,
142
- # use_prefix_lm_task=True, # use S-denoising
143
- # rates=[0.4 / len(R_DENOISER_SPAN_LENGTHS)]*len(R_DENOISER_SPAN_LENGTHS) + [
144
- # 0.4 / len(X_DENOISER_SPAN_LENGTHS)]*len(X_DENOISER_SPAN_LENGTHS) + [0.2], # equal total 40% rate for both R- and X-denoisers + 20% for S-denoising (suggested at the paper chapter 4.5)
145
- # mean_noise_span_lengths=R_DENOISER_SPAN_LENGTHS + X_DENOISER_SPAN_LENGTHS,
146
- # noise_densities=R_DENOISER_CORRUPT_RATES + X_DENOISER_CORRUPT_RATES,
147
- # optional_task_prefixes=[R_DENOISER_TOKEN_PREFIX]*len(R_DENOISER_SPAN_LENGTHS) + [
148
- # X_DENOISER_TOKEN_PREFIX]*len(X_DENOISER_SPAN_LENGTHS) + [S_DENOISER_TOKEN_PREFIX],
149
- # reserved_for_packing=1, # make room for task prefix token
150
- # ),
151
- # seqio.preprocessors.append_eos_after_trim,
152
- # ],
153
- # output_features={"targets": DEFAULT_OUTPUT_FEATURES["targets"]},
154
- # metric_fns=[metrics.accuracy]
155
- # )
 
62
  return {**key_map, target_key: x}
63
 
64
 
 
 
 
65
  TaskRegistry.add(
66
  "pretrain_medical_ul2",
67
  source=seqio.FunctionDataSource(
 
106
  },
107
  metric_fns=[metrics.accuracy],
108
  )