IhorIvanyshyn01 commited on
Commit
6899728
·
1 Parent(s): 648e32c

Fix dataset script loading and add parquet fallback

Browse files
Files changed (1) hide show
  1. script.py +24 -6
script.py CHANGED
@@ -315,13 +315,19 @@ if __name__ == "__main__":
315
  )
316
 
317
  from datasets import load_dataset
318
- data_files = {
319
- "validation": [str(p) for p in data_path.rglob("*public*/**/*.tar")],
320
- "test": [str(p) for p in data_path.rglob("*private*/**/*.tar")],
321
- }
 
 
 
322
  print(f"Data files: {data_files}")
 
 
 
323
  dataset = load_dataset(
324
- str(data_path / "hoho22k_2026_test_x_anon.py"),
325
  data_files=data_files,
326
  trust_remote_code=True,
327
  writer_batch_size=100,
@@ -366,6 +372,8 @@ if __name__ == "__main__":
366
  else:
367
  try:
368
  pred_v, pred_e = predict_sample(fused, model, device)
 
 
369
 
370
  # Apply handcrafted triangulation tracking to catch missing corners/edges
371
  try:
@@ -378,8 +386,11 @@ if __name__ == "__main__":
378
  print(f" Track ensemble failed for {order_id}: {track_e_err}")
379
 
380
  except Exception as e:
381
- print(f" Predict failed for {order_id}: {e}")
 
382
  pred_v, pred_e = empty_solution()
 
 
383
 
384
  solution.append({
385
  "order_id": order_id,
@@ -399,6 +410,13 @@ if __name__ == "__main__":
399
  output_path = Path(params.get('output_path', '.'))
400
  with open(output_path / "submission.json", "w") as f:
401
  json.dump(solution, f)
 
 
 
 
 
 
 
402
 
403
  elapsed = time.time() - t_start
404
  print(f"\nDone. {processed} samples in {elapsed:.0f}s ({elapsed/max(processed,1):.1f}s/sample)")
 
315
  )
316
 
317
  from datasets import load_dataset
318
+ data_files = {}
319
+ public_tars = sorted([str(p) for p in data_path.rglob('*public*/**/*.tar')])
320
+ private_tars = sorted([str(p) for p in data_path.rglob('*private*/**/*.tar')])
321
+ if public_tars:
322
+ data_files["validation"] = public_tars
323
+ if private_tars:
324
+ data_files["test"] = private_tars
325
  print(f"Data files: {data_files}")
326
+ loading_scripts = sorted(data_path.rglob('*.py'))
327
+ loading_script = str(loading_scripts[0]) if loading_scripts else str(data_path)
328
+
329
  dataset = load_dataset(
330
+ loading_script,
331
  data_files=data_files,
332
  trust_remote_code=True,
333
  writer_batch_size=100,
 
372
  else:
373
  try:
374
  pred_v, pred_e = predict_sample(fused, model, device)
375
+ if torch.cuda.is_available():
376
+ torch.cuda.empty_cache()
377
 
378
  # Apply handcrafted triangulation tracking to catch missing corners/edges
379
  try:
 
386
  print(f" Track ensemble failed for {order_id}: {track_e_err}")
387
 
388
  except Exception as e:
389
+ import traceback
390
+ print(f" Predict failed for {order_id}:\n{traceback.format_exc()}")
391
  pred_v, pred_e = empty_solution()
392
+ if torch.cuda.is_available():
393
+ torch.cuda.empty_cache()
394
 
395
  solution.append({
396
  "order_id": order_id,
 
410
  output_path = Path(params.get('output_path', '.'))
411
  with open(output_path / "submission.json", "w") as f:
412
  json.dump(solution, f)
413
+
414
+ try:
415
+ import pandas as pd
416
+ sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
417
+ sub.to_parquet(output_path / "submission.parquet")
418
+ except Exception as e:
419
+ print(f"Failed to write parquet: {e}")
420
 
421
  elapsed = time.time() - t_start
422
  print(f"\nDone. {processed} samples in {elapsed:.0f}s ({elapsed/max(processed,1):.1f}s/sample)")