Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

LICENSE.md +13 -0
README.md +20 -0
example_notebook.ipynb +0 -0
params.json +23 -0
script.py +126 -0

LICENSE.md ADDED Viewed

	@@ -0,0 +1,13 @@

+Copyright 2025 Dmytro Mishkin, Jack Langerman
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+---
+license: apache-2.0
+---
+# Handcrafted solution example for the S23DR competition
+This repo provides a minimalistic example of a wireframe estimation submission to S23DR competition.
+We recommend you take a look at [this example](https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py), for detailed code of this submission. It also provides useful I/O and visualization functions.
+This example seeks to simply provide minimal code which succeeds at reading the dataset and producing a solution (in this case two vertices at the origin and edge of zero length connecting them).
+`script.py` - is the main file which is run by the competition space. It should produce `submission.parquet` as the result of the run. Please see the additional comments in the `script.py` file.
+# How to submit
+Use the notebook [example_notebook.ipynb](example_notebook.ipynb)

example_notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

params.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "competition_id": "usm3d/S23DR2025",
+    "competition_type": "script",
+    "metric": "custom",
+    "token": "hf_******",
+    "team_id": "xxxxxxxxx_your_team_name_xxxxxxxxxx",
+    "submission_id": "xxxxxxxxx_your_sub_id_xxxxxxxxxx",
+    "submission_id_col": "order_id",
+    "submission_cols": [
+        "order_id",
+        "wf_vertices",
+        "wf_edges",
+        "wf_classifications"
+    ],
+    "submission_rows": 267,
+    "output_path": "/tmp/model",
+    "submission_repo": "<your submission repo>",
+    "time_limit": 7200,
+    "dataset": "usm3d/hoho25k_test_x",
+    "submission_filenames": [
+        "submission.parquet"
+    ]
+}

script.py ADDED Viewed

	@@ -0,0 +1,126 @@

+### This is example of the script that will be run in the test environment.
+### You can change the rest of the code to define and test your solution.
+### However, you should not change the signature of the provided function.
+### The script saves "submission.parquet" file in the current directory.
+### You can use any additional files and subdirectories to organize your code.
+from pathlib import Path
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+from datasets import load_dataset
+from typing import Dict
+from joblib import Parallel, delayed
+import os
+import json
+import gc
+from hoho2025.example_solutions import predict_wireframe
+# check the https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py for the example solution
+def empty_solution():
+    '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
+    return np.zeros((2,3)), [(0, 1)]
+class Sample(Dict):
+    def pick_repr_data(self, x):
+        if hasattr(x, 'shape'):
+            return x.shape
+        if isinstance(x, (str, float, int)):
+            return x
+        if isinstance(x, list):
+            return [type(x[0])] if len(x) > 0 else []
+        return type(x)
+    def __repr__(self):
+        # return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
+        return str({k: self.pick_repr_data(v) for k,v in self.items()})
+if __name__ == "__main__":
+    print ("------------ Loading dataset------------ ")
+    param_path = Path('params.json')
+    print(param_path)
+    with param_path.open() as f:
+        params = json.load(f)
+    print(params)
+    import os
+    print('pwd:')
+    os.system('pwd')
+    print(os.system('ls -lahtr'))
+    print('/tmp/data/')
+    print(os.system('ls -lahtr /tmp/data/'))
+    print('/tmp/data/data')
+    print(os.system('ls -lahtrR /tmp/data/data'))
+    data_path_test_server = Path('/tmp/data')
+    data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
+    if data_path_test_server.exists():
+        # data_path = data_path_test_server
+        TEST_ENV = True
+    else:
+        # data_path = data_path_local
+        TEST_ENV = False
+        from huggingface_hub import snapshot_download
+        _ = snapshot_download(
+            repo_id=params['dataset'],
+            local_dir="/tmp/data",
+            repo_type="dataset",
+        )
+    data_path = data_path_test_server
+    print(data_path)
+    # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
+    # data_files = {
+    #     "validation": [str(p) for p in [*data_path.rglob('*validation*.arrow')]+[*data_path.rglob('*public*/**/*.tar')]],
+    #     "test": [str(p) for p in [*data_path.rglob('*test*.arrow')]+[*data_path.rglob('*private*/**/*.tar')]],
+    # }
+    data_files = {
+        "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
+        "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
+    }
+    print(data_files)
+    dataset = load_dataset(
+        str(data_path / 'hoho25k_test_x.py'),
+        data_files=data_files,
+        trust_remote_code=True,
+        writer_batch_size=100
+    )
+    print('load with webdataset')
+    print(dataset, flush=True)
+    print('------------ Now you can do your solution ---------------')
+    solution = []
+    def process_sample(sample, i):
+        try:
+            pred_vertices, pred_edges = predict_wireframe(sample)
+        except:
+            pred_vertices, pred_edges = empty_solution()
+        if i %10 == 0:
+            gc.collect()
+        return {
+            'order_id': sample['order_id'],
+            'wf_vertices': pred_vertices.tolist(),
+            'wf_edges': pred_edges
+        }
+    num_cores = 4
+    for subset_name in dataset.keys():
+        print (f"Predicting {subset_name}")
+        for i, sample in enumerate(tqdm(dataset[subset_name])):
+            res = process_sample(sample, i)
+            solution.append(res)
+    print('------------ Saving results ---------------')
+    sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
+    sub.to_parquet("submission.parquet")
+    print("------------ Done ------------ ")