### This is example of the script that will be run in the test environment. ### You can change the rest of the code to define and test your solution. ### However, you should not change the signature of the provided function. ### The script saves "submission.json" file in the current directory. ### You can use any additional files and subdirectories to organize your code. from pathlib import Path from tqdm import tqdm import json import numpy as np from datasets import load_dataset from typing import Dict from joblib import Parallel, delayed def empty_solution(sample): '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.''' return np.zeros((2,3)), [(0, 1)] def predict_wireframe_safely(sample): pred_vertices, pred_edges = empty_solution(sample) pred_edges = [(int(a), int(b)) for a, b in pred_edges] # to remove possible np.int64 return pred_vertices, pred_edges, sample['order_id'] class Sample(Dict): def pick_repr_data(self, x): if hasattr(x, 'shape'): return x.shape if isinstance(x, (str, float, int)): return x if isinstance(x, list): return [type(x[0])] if len(x) > 0 else [] return type(x) def __repr__(self): # return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()}) return str({k: self.pick_repr_data(v) for k,v in self.items()}) import json if __name__ == "__main__": print ("------------ Loading dataset------------ ") param_path = Path('params.json') print(param_path) with param_path.open() as f: params = json.load(f) print(params) import os print('pwd:') os.system('pwd') print(os.system('ls -lahtr')) print('/tmp/data/') print(os.system('ls -lahtr /tmp/data/')) print('/tmp/data/data') print(os.system('ls -lahtrR /tmp/data/data')) data_path_test_server = Path('/tmp/data') data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho22k_2026_test_x_anon/' if data_path_test_server.exists(): # data_path = data_path_test_server TEST_ENV = True else: # data_path = data_path_local TEST_ENV = False from huggingface_hub import snapshot_download _ = snapshot_download( repo_id=params['dataset'], local_dir="/tmp/data", repo_type="dataset", ) data_path = data_path_test_server print(data_path) # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token']) # data_files = { # "validation": [str(p) for p in [*data_path.rglob('*validation*.arrow')]+[*data_path.rglob('*public*/**/*.tar')]], # "test": [str(p) for p in [*data_path.rglob('*test*.arrow')]+[*data_path.rglob('*private*/**/*.tar')]], # } data_files = { "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')], "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')], } print(data_files) dataset = load_dataset( str(data_path / 'hoho22k_2026_test_x_anon.py'), data_files=data_files, trust_remote_code=True, writer_batch_size=100 ) # if TEST_ENV: # dataset = load_dataset( # "webdataset", # data_files=data_files, # trust_remote_code=True, # # streaming=True # ) print('load with webdataset') # else: # dataset = load_dataset( # "arrow", # data_files=data_files, # trust_remote_code=True, # # streaming=True # ) # print('load with arrow') print(dataset, flush=True) # dataset = load_dataset('webdataset', data_files={) print('------------ Now you can do your solution ---------------') solution = [] for subset_name in dataset: print (f"Predicitng on {subset_name}") preds = Parallel(n_jobs=-1, prefer="processes")( delayed(predict_wireframe_safely)(a) for a in tqdm(dataset[subset_name]) ) print ("Converting") for p in preds: pred_vertices, pred_edges, order_id = p print (f'{order_id}: {len(pred_vertices)} verts, {len(pred_edges)} edges') solution.append({ 'order_id': order_id, 'wf_vertices': pred_vertices.tolist(), 'wf_edges': pred_edges }) print('------------ Saving results ---------------') with open("submission.json", "w") as f: json.dump(solution, f) print("------------ Done ------------ ")