uuuhjb commited on
Commit
2581d07
Β·
1 Parent(s): 93f9e60

update push_to_hf

Browse files
Files changed (1) hide show
  1. submission.py +110 -0
submission.py CHANGED
@@ -333,6 +333,100 @@ def update_leaderboard_data(
333
  return False
334
 
335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  # ---------------------------------------------------------------------------
337
  # Main entry point
338
  # ---------------------------------------------------------------------------
@@ -397,6 +491,22 @@ def add_new_submission(
397
  with open(f"{submission_dir}/metadata_{timestamp}.json", "w", encoding="utf-8") as f_meta:
398
  json.dump(metadata, f_meta, indent=2, ensure_ascii=False)
399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  # Update leaderboard
401
  updated = update_leaderboard_data(
402
  model_or_agent_name=model,
 
333
  return False
334
 
335
 
336
+ # ---------------------------------------------------------------------------
337
+ # HuggingFace submission push
338
+ # ---------------------------------------------------------------------------
339
+
340
+ HF_SUBMISSIONS_DATASET = "AMA-bench/AMA_submissions_internal"
341
+
342
+
343
+ def push_submission_to_hf(
344
+ submissions: List[dict],
345
+ metadata: dict,
346
+ score_dict: Dict,
347
+ token: str,
348
+ timestamp: str,
349
+ ) -> Tuple[bool, str]:
350
+ """
351
+ Push raw submission + metadata + scores to the private HuggingFace dataset
352
+ ``AMA-bench/AMA_submissions_internal``.
353
+
354
+ The dataset is expected (or will be created) with a single ``data`` config.
355
+ Each call appends one row per episode, using a Parquet shard named by
356
+ ``{organisation}_{model}_{timestamp}``.
357
+
358
+ Row schema
359
+ ----------
360
+ submission_id : str – "{organisation}_{model}_{timestamp}"
361
+ organisation : str
362
+ model_name : str
363
+ submission_type : str – "agent" | "model"
364
+ timestamp : str – "YYYYMMDD_HHMMSS"
365
+ date : str – "YYYY-MM-DD"
366
+ episode_id : str
367
+ question_uuid_list : str – JSON-encoded list
368
+ answer_list : str – JSON-encoded list
369
+ llm_as_judge_score_list : str – JSON-encoded list
370
+ reasoning_trace : str – optional, empty string if absent
371
+ score_json : str – JSON-encoded per-domain score dict
372
+ metadata_json : str – JSON-encoded full metadata dict
373
+ """
374
+ try:
375
+ from huggingface_hub import HfApi
376
+ import pandas as pd
377
+ import io
378
+
379
+ api = HfApi(token=token)
380
+ organisation = metadata.get("organisation", "unknown")
381
+ model_name = metadata.get("model", metadata.get("agent_name", "unknown"))
382
+ submission_id = f"{organisation}_{model_name}_{timestamp}"
383
+
384
+ # Build one row per episode submission
385
+ rows = []
386
+ for sub in submissions:
387
+ rows.append({
388
+ "submission_id": submission_id,
389
+ "organisation": organisation,
390
+ "model_name": model_name,
391
+ "submission_type": metadata.get("submission_type", ""),
392
+ "timestamp": timestamp,
393
+ "date": metadata.get("Date", ""),
394
+ "episode_id": str(sub.get("episode_id", "")),
395
+ "question_uuid_list": json.dumps(sub.get("question_uuid_list", []), ensure_ascii=False),
396
+ "answer_list": json.dumps(sub.get("answer_list", []), ensure_ascii=False),
397
+ "llm_as_judge_score_list": json.dumps(sub.get("llm_as_judge_score_list", []), ensure_ascii=False),
398
+ "reasoning_trace": str(sub.get("reasoning_trace", "")),
399
+ "score_json": json.dumps(score_dict, ensure_ascii=False),
400
+ "metadata_json": json.dumps(metadata, ensure_ascii=False),
401
+ })
402
+
403
+ df = pd.DataFrame(rows)
404
+
405
+ # Serialise to Parquet in memory
406
+ buf = io.BytesIO()
407
+ df.to_parquet(buf, index=False)
408
+ buf.seek(0)
409
+
410
+ # Upload as a new shard under data/
411
+ path_in_repo = f"data/{submission_id}.parquet"
412
+ api.upload_file(
413
+ path_or_fileobj=buf,
414
+ path_in_repo=path_in_repo,
415
+ repo_id=HF_SUBMISSIONS_DATASET,
416
+ repo_type="dataset",
417
+ commit_message=f"Add submission: {submission_id}",
418
+ )
419
+
420
+ print(f"[hf_push] Pushed {len(rows)} row(s) to {HF_SUBMISSIONS_DATASET}/{path_in_repo}")
421
+ return True, submission_id
422
+
423
+ except Exception as e:
424
+ import traceback
425
+ traceback.print_exc()
426
+ print(f"[hf_push] ERROR: {e}")
427
+ return False, str(e)
428
+
429
+
430
  # ---------------------------------------------------------------------------
431
  # Main entry point
432
  # ---------------------------------------------------------------------------
 
491
  with open(f"{submission_dir}/metadata_{timestamp}.json", "w", encoding="utf-8") as f_meta:
492
  json.dump(metadata, f_meta, indent=2, ensure_ascii=False)
493
 
494
+ # Push to HuggingFace private submissions dataset
495
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("TOKEN")
496
+ if hf_token:
497
+ hf_ok, hf_result = push_submission_to_hf(
498
+ submissions=submissions,
499
+ metadata=metadata,
500
+ score_dict=score_dict,
501
+ token=hf_token,
502
+ timestamp=timestamp,
503
+ )
504
+ if not hf_ok:
505
+ print(f"[hf_push] WARNING: Push to HuggingFace failed: {hf_result}")
506
+ # Non-fatal β€” we continue even if HF push fails
507
+ else:
508
+ print("[hf_push] WARNING: No HF_TOKEN found, skipping HuggingFace push.")
509
+
510
  # Update leaderboard
511
  updated = update_leaderboard_data(
512
  model_or_agent_name=model,