Upload train.py
Browse files
train.py
CHANGED
|
@@ -166,11 +166,11 @@ def add_image_path(example):
|
|
| 166 |
folder_name=example["folder_name"]
|
| 167 |
#image_name = example['image_id'] + '.jpg'
|
| 168 |
#image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
|
| 169 |
-
image_path = [os.path.join(rf"
|
| 170 |
example['image_path'] = image_path
|
| 171 |
return example
|
| 172 |
|
| 173 |
-
ds = dataset.map(add_image_path, batched=True, batch_size=
|
| 174 |
print(ds)
|
| 175 |
|
| 176 |
ds = ds.train_test_split(test_size=0.02)
|
|
@@ -180,6 +180,8 @@ processed_dataset = ds.map(
|
|
| 180 |
function=preprocess_fn,
|
| 181 |
batched=True,
|
| 182 |
fn_kwargs={"max_target_length": 128},
|
|
|
|
|
|
|
| 183 |
#remove_columns=ds['train'].column_names
|
| 184 |
)
|
| 185 |
|
|
|
|
| 166 |
folder_name=example["folder_name"]
|
| 167 |
#image_name = example['image_id'] + '.jpg'
|
| 168 |
#image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
|
| 169 |
+
image_path = [os.path.join(rf"/home/user/dump_small/{folder_name[i]}", image_name[i]) for i in range(len(image_name))]
|
| 170 |
example['image_path'] = image_path
|
| 171 |
return example
|
| 172 |
|
| 173 |
+
ds = dataset.map(add_image_path, batched=True, batch_size=8192)["train"]
|
| 174 |
print(ds)
|
| 175 |
|
| 176 |
ds = ds.train_test_split(test_size=0.02)
|
|
|
|
| 180 |
function=preprocess_fn,
|
| 181 |
batched=True,
|
| 182 |
fn_kwargs={"max_target_length": 128},
|
| 183 |
+
batch_size=8192,
|
| 184 |
+
num_proc=16,
|
| 185 |
#remove_columns=ds['train'].column_names
|
| 186 |
)
|
| 187 |
|