import io import json import tarfile import zipfile import numpy as np import sys import os from datasets import load_dataset from hoho2025.metric_helper import hss # Import the solution script locally import script print("Loading dataset from local parquet...") dataset = load_dataset('parquet', data_files={"train": "/tmp/data/data/train-00000-of-00002.parquet"}) print(f"Loaded {len(dataset['train'])} examples.") scores = [] for idx, sample in enumerate(dataset['train']): if idx >= 5: # Just test first 5 break order_id = sample.get('order_id', str(idx)) print(f"\n--- Testing order_id: {order_id} ---") # 1. Evaluate prediction # This will likely fall back to empty_solution due to missing 'gestalt' pred_v, pred_e, _ = script.predict_wireframe_safely(sample) # 2. Extract Ground Truth from ZIP gt_v = None gt_e = None try: with zipfile.ZipFile(io.BytesIO(sample['data']), "r") as zf: if 'gt_vertices.npy' in zf.namelist(): gt_v = np.load(io.BytesIO(zf.read('gt_vertices.npy'))) if 'gt_edges.npy' in zf.namelist(): gt_e = np.load(io.BytesIO(zf.read('gt_edges.npy'))) except Exception as e: print(f"Failed to read ZIP contents for GT: {e}") continue if gt_v is None or gt_e is None: print("Missing ground truth for this sample.") continue # 3. Compute HSS metric Score res = hss(pred_v, pred_e, gt_v, gt_e) scores.append(res.hss) print(f"Predicted Vertices: {len(pred_v)} | Predicted Edges: {len(pred_e)}") print(f"GT Vertices: {len(gt_v)} | GT Edges: {len(gt_e)}") print(f"HSS Score: {res.hss:.4f}") avg_score = sum(scores) / len(scores) if scores else 0 print(f"\nAverage HSS Score on subset: {avg_score:.4f}")