IhorIvanyshyn01 commited on
Commit
ffa2c1d
·
1 Parent(s): 677e5b4

Fix competition config and script crash

Browse files
Files changed (2) hide show
  1. params.json +3 -3
  2. script.py +84 -69
params.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "competition_id": "usm3d/S23DR2025",
3
  "competition_type": "script",
4
  "metric": "custom",
5
  "token": "hf_******",
@@ -16,8 +16,8 @@
16
  "output_path": "/tmp/model",
17
  "submission_repo": "IhorIvanyshyn01/my-s23dr-submission",
18
  "time_limit": 7200,
19
- "dataset": "usm3d/hoho22k_2026_test_x_anon",
20
  "submission_filenames": [
21
- "submission.parquet"
22
  ]
23
  }
 
1
  {
2
+ "competition_id": "usm3d/S23DR2026",
3
  "competition_type": "script",
4
  "metric": "custom",
5
  "token": "hf_******",
 
16
  "output_path": "/tmp/model",
17
  "submission_repo": "IhorIvanyshyn01/my-s23dr-submission",
18
  "time_limit": 7200,
19
+ "dataset": "parquet",
20
  "submission_filenames": [
21
+ "submission.json"
22
  ]
23
  }
script.py CHANGED
@@ -16,7 +16,7 @@ from joblib import Parallel, delayed
16
 
17
  def empty_solution(sample):
18
  '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
19
- return np.zeros((2,3)), [(0, 1)], sample['order_id']
20
 
21
  def predict_wireframe_safely(sample):
22
  try:
@@ -41,7 +41,82 @@ class Sample(Dict):
41
  # return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
42
  return str({k: self.pick_repr_data(v) for k,v in self.items()})
43
 
44
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  import json
46
  if __name__ == "__main__":
47
  print ("------------ Loading dataset------------ ")
@@ -49,76 +124,16 @@ if __name__ == "__main__":
49
  print(param_path)
50
  with param_path.open() as f:
51
  params = json.load(f)
52
- print(params)
53
- import os
54
-
55
- print('pwd:')
56
- os.system('pwd')
57
- print(os.system('ls -lahtr'))
58
- print('/tmp/data/')
59
- print(os.system('ls -lahtr /tmp/data/'))
60
- print('/tmp/data/data')
61
- print(os.system('ls -lahtrR /tmp/data/data'))
62
-
63
 
64
- data_path_test_server = Path('/tmp/data')
65
- data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho22k_2026_test_x_anon/'
66
-
67
- if data_path_test_server.exists():
68
- # data_path = data_path_test_server
69
- TEST_ENV = True
70
- else:
71
- # data_path = data_path_local
72
- TEST_ENV = False
73
- from huggingface_hub import snapshot_download
74
- _ = snapshot_download(
75
- repo_id=params['dataset'],
76
- local_dir="/tmp/data",
77
- repo_type="dataset",
78
- )
79
- data_path = data_path_test_server
80
-
81
-
82
- print(data_path)
83
-
84
- # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
85
- # data_files = {
86
- # "validation": [str(p) for p in [*data_path.rglob('*validation*.arrow')]+[*data_path.rglob('*public*/**/*.tar')]],
87
- # "test": [str(p) for p in [*data_path.rglob('*test*.arrow')]+[*data_path.rglob('*private*/**/*.tar')]],
88
- # }
89
- data_files = {
90
- "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
91
- "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
92
- }
93
- print(data_files)
94
- dataset = load_dataset(
95
- str(data_path / 'hoho22k_2026_test_x_anon.py'),
96
- data_files=data_files,
97
- trust_remote_code=True,
98
- writer_batch_size=100
99
- )
100
 
101
- # if TEST_ENV:
102
- # dataset = load_dataset(
103
- # "webdataset",
104
- # data_files=data_files,
105
- # trust_remote_code=True,
106
- # # streaming=True
107
- # )
108
- print('load with webdataset')
109
- # else:
110
-
111
- # dataset = load_dataset(
112
- # "arrow",
113
- # data_files=data_files,
114
- # trust_remote_code=True,
115
- # # streaming=True
116
- # )
117
- # print('load with arrow')
118
-
119
-
120
  print(dataset, flush=True)
121
- # dataset = load_dataset('webdataset', data_files={)
122
 
123
  print('------------ Now you can do your solution ---------------')
124
  solution = []
 
16
 
17
  def empty_solution(sample):
18
  '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
19
+ return np.zeros((2,3)), [(0, 1)]
20
 
21
  def predict_wireframe_safely(sample):
22
  try:
 
41
  # return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
42
  return str({k: self.pick_repr_data(v) for k,v in self.items()})
43
 
44
+ def load_competition_dataset(params):
45
+ """
46
+ Loads dataset both:
47
+ 1. Locally from public parquet files.
48
+ 2. In official competition environment from /tmp/data.
49
+ """
50
+ import os
51
+
52
+ data_path = Path("/tmp/data")
53
+
54
+ print("------------ Dataset path check ------------")
55
+ print("pwd:")
56
+ os.system("pwd")
57
+
58
+ print("/tmp/data:")
59
+ os.system("ls -lahtr /tmp/data || true")
60
+
61
+ print("/tmp/data/data:")
62
+ os.system("ls -lahtr /tmp/data/data || true")
63
+
64
+ # Case 1: local debugging with public parquet dataset
65
+ parquet_dir = data_path / "data"
66
+ train_parquet = list(parquet_dir.glob("train-*.parquet"))
67
+ val_parquet = list(parquet_dir.glob("validation-*.parquet"))
68
+
69
+ if len(train_parquet) > 0 or len(val_parquet) > 0:
70
+ print("Loading local/public parquet dataset")
71
+
72
+ data_files = {}
73
+
74
+ if len(train_parquet) > 0:
75
+ data_files["train"] = str(parquet_dir / "train-*.parquet")
76
+
77
+ if len(val_parquet) > 0:
78
+ data_files["validation"] = str(parquet_dir / "validation-*.parquet")
79
+
80
+ dataset = load_dataset("parquet", data_files=data_files)
81
+ return dataset
82
+
83
+ # Case 2: official test environment with custom dataset script
84
+ dataset_script_candidates = list(data_path.glob("*.py"))
85
+
86
+ if len(dataset_script_candidates) > 0:
87
+ dataset_script = dataset_script_candidates[0]
88
+ print(f"Loading official dataset script: {dataset_script}")
89
+
90
+ data_files = {
91
+ "validation": [str(p) for p in data_path.rglob("*public*/**/*.tar")],
92
+ "test": [str(p) for p in data_path.rglob("*private*/**/*.tar")],
93
+ }
94
+
95
+ print("data_files:", data_files)
96
+
97
+ dataset = load_dataset(
98
+ str(dataset_script),
99
+ data_files=data_files,
100
+ trust_remote_code=True,
101
+ writer_batch_size=100,
102
+ )
103
+
104
+ return dataset
105
+
106
+ # Case 3: fallback download for local run
107
+ print("No local /tmp/data files found. Trying Hugging Face download.")
108
+
109
+ from huggingface_hub import snapshot_download
110
+
111
+ snapshot_download(
112
+ repo_id=params["dataset"],
113
+ local_dir="/tmp/data",
114
+ repo_type="dataset",
115
+ token=params.get("token", None),
116
+ )
117
+
118
+ return load_competition_dataset(params)
119
+
120
  import json
121
  if __name__ == "__main__":
122
  print ("------------ Loading dataset------------ ")
 
124
  print(param_path)
125
  with param_path.open() as f:
126
  params = json.load(f)
127
+ safe_params = dict(params)
128
+ if "token" in safe_params:
129
+ safe_params["token"] = "hf_******"
 
 
 
 
 
 
 
 
130
 
131
+ print(safe_params)
132
+
133
+ print("------------ Loading dataset ------------")
134
+ dataset = load_competition_dataset(params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  print(dataset, flush=True)
 
137
 
138
  print('------------ Now you can do your solution ---------------')
139
  solution = []