jacklangerman commited on
Commit
526367e
·
1 Parent(s): 0588073
Files changed (1) hide show
  1. script.py +10 -6
script.py CHANGED
@@ -52,8 +52,11 @@ if __name__ == "__main__":
52
 
53
  if data_path_test_server.exists():
54
  data_path = data_path_test_server
 
55
  else:
56
  data_path = data_path_local
 
 
57
 
58
  print(data_path)
59
 
@@ -65,22 +68,23 @@ if __name__ == "__main__":
65
  print(data_files)
66
 
67
 
68
- try:
69
  dataset = load_dataset(
70
- "arrow",
71
  data_files=data_files,
72
  trust_remote_code=True,
73
  # streaming=True
74
  )
75
- print('load with arrow')
76
- except:
77
  dataset = load_dataset(
78
- "webdataset",
79
  data_files=data_files,
80
  trust_remote_code=True,
81
  # streaming=True
82
  )
83
- print('load with webdataset')
 
84
 
85
  print(dataset, flush=True)
86
  # dataset = load_dataset('webdataset', data_files={)
 
52
 
53
  if data_path_test_server.exists():
54
  data_path = data_path_test_server
55
+ TEST_ENV = True
56
  else:
57
  data_path = data_path_local
58
+ TEST_ENV = False
59
+
60
 
61
  print(data_path)
62
 
 
68
  print(data_files)
69
 
70
 
71
+ if TEST_ENV:
72
  dataset = load_dataset(
73
+ "webdataset",
74
  data_files=data_files,
75
  trust_remote_code=True,
76
  # streaming=True
77
  )
78
+ print('load with webdataset')
79
+ else:
80
  dataset = load_dataset(
81
+ "arrow",
82
  data_files=data_files,
83
  trust_remote_code=True,
84
  # streaming=True
85
  )
86
+ print('load with arrow')
87
+
88
 
89
  print(dataset, flush=True)
90
  # dataset = load_dataset('webdataset', data_files={)