Commit ·
6899728
1
Parent(s): 648e32c
Fix dataset script loading and add parquet fallback
Browse files
script.py
CHANGED
|
@@ -315,13 +315,19 @@ if __name__ == "__main__":
|
|
| 315 |
)
|
| 316 |
|
| 317 |
from datasets import load_dataset
|
| 318 |
-
data_files = {
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
| 322 |
print(f"Data files: {data_files}")
|
|
|
|
|
|
|
|
|
|
| 323 |
dataset = load_dataset(
|
| 324 |
-
|
| 325 |
data_files=data_files,
|
| 326 |
trust_remote_code=True,
|
| 327 |
writer_batch_size=100,
|
|
@@ -366,6 +372,8 @@ if __name__ == "__main__":
|
|
| 366 |
else:
|
| 367 |
try:
|
| 368 |
pred_v, pred_e = predict_sample(fused, model, device)
|
|
|
|
|
|
|
| 369 |
|
| 370 |
# Apply handcrafted triangulation tracking to catch missing corners/edges
|
| 371 |
try:
|
|
@@ -378,8 +386,11 @@ if __name__ == "__main__":
|
|
| 378 |
print(f" Track ensemble failed for {order_id}: {track_e_err}")
|
| 379 |
|
| 380 |
except Exception as e:
|
| 381 |
-
|
|
|
|
| 382 |
pred_v, pred_e = empty_solution()
|
|
|
|
|
|
|
| 383 |
|
| 384 |
solution.append({
|
| 385 |
"order_id": order_id,
|
|
@@ -399,6 +410,13 @@ if __name__ == "__main__":
|
|
| 399 |
output_path = Path(params.get('output_path', '.'))
|
| 400 |
with open(output_path / "submission.json", "w") as f:
|
| 401 |
json.dump(solution, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
elapsed = time.time() - t_start
|
| 404 |
print(f"\nDone. {processed} samples in {elapsed:.0f}s ({elapsed/max(processed,1):.1f}s/sample)")
|
|
|
|
| 315 |
)
|
| 316 |
|
| 317 |
from datasets import load_dataset
|
| 318 |
+
data_files = {}
|
| 319 |
+
public_tars = sorted([str(p) for p in data_path.rglob('*public*/**/*.tar')])
|
| 320 |
+
private_tars = sorted([str(p) for p in data_path.rglob('*private*/**/*.tar')])
|
| 321 |
+
if public_tars:
|
| 322 |
+
data_files["validation"] = public_tars
|
| 323 |
+
if private_tars:
|
| 324 |
+
data_files["test"] = private_tars
|
| 325 |
print(f"Data files: {data_files}")
|
| 326 |
+
loading_scripts = sorted(data_path.rglob('*.py'))
|
| 327 |
+
loading_script = str(loading_scripts[0]) if loading_scripts else str(data_path)
|
| 328 |
+
|
| 329 |
dataset = load_dataset(
|
| 330 |
+
loading_script,
|
| 331 |
data_files=data_files,
|
| 332 |
trust_remote_code=True,
|
| 333 |
writer_batch_size=100,
|
|
|
|
| 372 |
else:
|
| 373 |
try:
|
| 374 |
pred_v, pred_e = predict_sample(fused, model, device)
|
| 375 |
+
if torch.cuda.is_available():
|
| 376 |
+
torch.cuda.empty_cache()
|
| 377 |
|
| 378 |
# Apply handcrafted triangulation tracking to catch missing corners/edges
|
| 379 |
try:
|
|
|
|
| 386 |
print(f" Track ensemble failed for {order_id}: {track_e_err}")
|
| 387 |
|
| 388 |
except Exception as e:
|
| 389 |
+
import traceback
|
| 390 |
+
print(f" Predict failed for {order_id}:\n{traceback.format_exc()}")
|
| 391 |
pred_v, pred_e = empty_solution()
|
| 392 |
+
if torch.cuda.is_available():
|
| 393 |
+
torch.cuda.empty_cache()
|
| 394 |
|
| 395 |
solution.append({
|
| 396 |
"order_id": order_id,
|
|
|
|
| 410 |
output_path = Path(params.get('output_path', '.'))
|
| 411 |
with open(output_path / "submission.json", "w") as f:
|
| 412 |
json.dump(solution, f)
|
| 413 |
+
|
| 414 |
+
try:
|
| 415 |
+
import pandas as pd
|
| 416 |
+
sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
|
| 417 |
+
sub.to_parquet(output_path / "submission.parquet")
|
| 418 |
+
except Exception as e:
|
| 419 |
+
print(f"Failed to write parquet: {e}")
|
| 420 |
|
| 421 |
elapsed = time.time() - t_start
|
| 422 |
print(f"\nDone. {processed} samples in {elapsed:.0f}s ({elapsed/max(processed,1):.1f}s/sample)")
|